diff options
Diffstat (limited to 'drivers/misc')
94 files changed, 34950 insertions, 0 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig new file mode 100644 index 00000000000..e3551d20464 --- /dev/null +++ b/drivers/misc/Kconfig @@ -0,0 +1,300 @@ +# +# Misc strange devices +# + +menuconfig MISC_DEVICES + bool "Misc devices" + default y + ---help--- + Say Y here to get to see options for device drivers from various + different categories. This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if MISC_DEVICES + +config AD525X_DPOT + tristate "Analog Devices AD525x Digital Potentiometers" + depends on I2C && SYSFS + help + If you say yes here, you get support for the Analog Devices + AD5258, AD5259, AD5251, AD5252, AD5253, AD5254 and AD5255 + digital potentiometer chips. + + See Documentation/misc-devices/ad525x_dpot.txt for the + userspace interface. + + This driver can also be built as a module. If so, the module + will be called ad525x_dpot. + +config ATMEL_PWM + tristate "Atmel AT32/AT91 PWM support" + depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91CAP9 + help + This option enables device driver support for the PWM channels + on certain Atmel processors. Pulse Width Modulation is used for + purposes including software controlled power-efficient backlights + on LCD displays, motor control, and waveform generation. + +config ATMEL_TCLIB + bool "Atmel AT32/AT91 Timer/Counter Library" + depends on (AVR32 || ARCH_AT91) + help + Select this if you want a library to allocate the Timer/Counter + blocks found on many Atmel processors. This facilitates using + these blocks by different drivers despite processor differences. + +config ATMEL_TCB_CLKSRC + bool "TC Block Clocksource" + depends on ATMEL_TCLIB && GENERIC_TIME + default y + help + Select this to get a high precision clocksource based on a + TC block with a 5+ MHz base clock rate. Two timer channels + are combined to make a single 32-bit timer. + + When GENERIC_CLOCKEVENTS is defined, the third timer channel + may be used as a clock event device supporting oneshot mode + (delays of up to two seconds) based on the 32 KiHz clock. + +config ATMEL_TCB_CLKSRC_BLOCK + int + depends on ATMEL_TCB_CLKSRC + prompt "TC Block" if ARCH_AT91RM9200 || ARCH_AT91SAM9260 || CPU_AT32AP700X + default 0 + range 0 1 + help + Some chips provide more than one TC block, so you have the + choice of which one to use for the clock framework. The other + TC can be used for other purposes, such as PWM generation and + interval timing. + +config IBM_ASM + tristate "Device driver for IBM RSA service processor" + depends on X86 && PCI && INPUT && EXPERIMENTAL + ---help--- + This option enables device driver support for in-band access to the + IBM RSA (Condor) service processor in eServer xSeries systems. + The ibmasm device driver allows user space application to access + ASM (Advanced Systems Management) functions on the service + processor. The driver is meant to be used in conjunction with + a user space API. + The ibmasm driver also enables the OS to use the UART on the + service processor board as a regular serial port. To make use of + this feature serial driver support (CONFIG_SERIAL_8250) must be + enabled. + + WARNING: This software may not be supported or function + correctly on your IBM server. Please consult the IBM ServerProven + website <http://www.pc.ibm.com/ww/eserver/xseries/serverproven> for + information on the specific driver level and support statement + for your IBM server. + +config PHANTOM + tristate "Sensable PHANToM (PCI)" + depends on PCI + help + Say Y here if you want to build a driver for Sensable PHANToM device. + + This driver is only for PCI PHANToMs. + + If you choose to build module, its name will be phantom. If unsure, + say N here. + +config SGI_IOC4 + tristate "SGI IOC4 Base IO support" + depends on PCI + ---help--- + This option enables basic support for the IOC4 chip on certain + SGI IO controller cards (IO9, IO10, and PCI-RT). This option + does not enable any specific functions on such a card, but provides + necessary infrastructure for other drivers to utilize. + + If you have an SGI Altix with an IOC4-based card say Y. + Otherwise say N. + +config TIFM_CORE + tristate "TI Flash Media interface support (EXPERIMENTAL)" + depends on EXPERIMENTAL && PCI + help + If you want support for Texas Instruments(R) Flash Media adapters + you should select this option and then also choose an appropriate + host adapter, such as 'TI Flash Media PCI74xx/PCI76xx host adapter + support', if you have a TI PCI74xx compatible card reader, for + example. + You will also have to select some flash card format drivers. MMC/SD + cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD + Interface support (MMC_TIFM_SD)'. + + To compile this driver as a module, choose M here: the module will + be called tifm_core. + +config TIFM_7XX1 + tristate "TI Flash Media PCI74xx/PCI76xx host adapter support (EXPERIMENTAL)" + depends on PCI && TIFM_CORE && EXPERIMENTAL + default TIFM_CORE + help + This option enables support for Texas Instruments(R) PCI74xx and + PCI76xx families of Flash Media adapters, found in many laptops. + To make actual use of the device, you will have to select some + flash card format drivers, as outlined in the TIFM_CORE Help. + + To compile this driver as a module, choose M here: the module will + be called tifm_7xx1. + +config ICS932S401 + tristate "Integrated Circuits ICS932S401" + depends on I2C && EXPERIMENTAL + help + If you say yes here you get support for the Integrated Circuits + ICS932S401 clock control chips. + + This driver can also be built as a module. If so, the module + will be called ics932s401. + +config ATMEL_SSC + tristate "Device driver for Atmel SSC peripheral" + depends on AVR32 || ARCH_AT91 + ---help--- + This option enables device driver support for Atmel Synchronized + Serial Communication peripheral (SSC). + + The SSC peripheral supports a wide variety of serial frame based + communications, i.e. I2S, SPI, etc. + + If unsure, say N. + +config ENCLOSURE_SERVICES + tristate "Enclosure Services" + default n + help + Provides support for intelligent enclosures (bays which + contain storage devices). You also need either a host + driver (SCSI/ATA) which supports enclosures + or a SCSI enclosure device (SES) to use these services. + +config SGI_XP + tristate "Support communication between SGI SSIs" + depends on NET + depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP + select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 + select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 + select SGI_GRU if X86_64 && SMP + ---help--- + An SGI machine can be divided into multiple Single System + Images which act independently of each other and have + hardware based memory protection from the others. Enabling + this feature will allow for direct communication between SSIs + based on a network adapter and DMA messaging. + +config CS5535_MFGPT + tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" + depends on PCI + depends on X86 + default n + help + This driver provides access to MFGPT functionality for other + drivers that need timers. MFGPTs are available in the CS5535 and + CS5536 companion chips that are found in AMD Geode and several + other platforms. They have a better resolution and max interval + than the generic PIT, and are suitable for use as high-res timers. + You probably don't want to enable this manually; other drivers that + make use of it should enable it. + +config CS5535_MFGPT_DEFAULT_IRQ + int + depends on CS5535_MFGPT + default 7 + help + MFGPTs on the CS5535 require an interrupt. The selected IRQ + can be overridden as a module option as well as by driver that + use the cs5535_mfgpt_ API; however, different architectures might + want to use a different IRQ by default. This is here for + architectures to set as necessary. + +config HP_ILO + tristate "Channel interface driver for HP iLO/iLO2 processor" + depends on PCI + default n + help + The channel interface driver allows applications to communicate + with iLO/iLO2 management processors present on HP ProLiant + servers. Upon loading, the driver creates /dev/hpilo/dXccbN files, + which can be used to gather data from the management processor, + via read and write system calls. + + To compile this driver as a module, choose M here: the + module will be called hpilo. + +config SGI_GRU + tristate "SGI GRU driver" + depends on X86_UV && SMP + default n + select MMU_NOTIFIER + ---help--- + The GRU is a hardware resource located in the system chipset. The GRU + contains memory that can be mmapped into the user address space. This memory is + used to communicate with the GRU to perform functions such as load/store, + scatter/gather, bcopy, AMOs, etc. The GRU is directly accessed by user + instructions using user virtual addresses. GRU instructions (ex., bcopy) use + user virtual addresses for operands. + + If you are not running on a SGI UV system, say N. + +config SGI_GRU_DEBUG + bool "SGI GRU driver debug" + depends on SGI_GRU + default n + ---help--- + This option enables addition debugging code for the SGI GRU driver. If + you are unsure, say N. + +config ISL29003 + tristate "Intersil ISL29003 ambient light sensor" + depends on I2C && SYSFS + help + If you say yes here you get support for the Intersil ISL29003 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called isl29003. + +config EP93XX_PWM + tristate "EP93xx PWM support" + depends on ARCH_EP93XX + help + This option enables device driver support for the PWM channels + on the Cirrus EP93xx processors. The EP9307 chip only has one + PWM channel all the others have two, the second channel is an + alternate function of the EGPIO14 pin. A sysfs interface is + provided to control the PWM channels. + + To compile this driver as a module, choose M here: the module will + be called ep93xx_pwm. + +config DS1682 + tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm" + depends on I2C && EXPERIMENTAL + help + If you say yes here you get support for Dallas Semiconductor + DS1682 Total Elapsed Time Recorder. + + This driver can also be built as a module. If so, the module + will be called ds1682. + +config TI_DAC7512 + tristate "Texas Instruments DAC7512" + depends on SPI && SYSFS + help + If you say yes here you get support for the Texas Instruments + DAC7512 16-bit digital-to-analog converter. + + This driver can also be built as a module. If so, the module + will be calles ti_dac7512. + +source "drivers/misc/c2port/Kconfig" +source "drivers/misc/eeprom/Kconfig" +source "drivers/misc/cb710/Kconfig" +source "drivers/misc/iwmc3200top/Kconfig" + +endif # MISC_DEVICES diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile new file mode 100644 index 00000000000..049ff2482f3 --- /dev/null +++ b/drivers/misc/Makefile @@ -0,0 +1,30 @@ +# +# Makefile for misc devices that really don't fit anywhere else. +# + +obj-$(CONFIG_IBM_ASM) += ibmasm/ +obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/ +obj-$(CONFIG_AD525X_DPOT) += ad525x_dpot.o +obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o +obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o +obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o +obj-$(CONFIG_ICS932S401) += ics932s401.o +obj-$(CONFIG_LKDTM) += lkdtm.o +obj-$(CONFIG_TIFM_CORE) += tifm_core.o +obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o +obj-$(CONFIG_PHANTOM) += phantom.o +obj-$(CONFIG_SGI_IOC4) += ioc4.o +obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o +obj-$(CONFIG_KGDB_TESTS) += kgdbts.o +obj-$(CONFIG_SGI_XP) += sgi-xp/ +obj-$(CONFIG_SGI_GRU) += sgi-gru/ +obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o +obj-$(CONFIG_HP_ILO) += hpilo.o +obj-$(CONFIG_ISL29003) += isl29003.o +obj-$(CONFIG_EP93XX_PWM) += ep93xx_pwm.o +obj-$(CONFIG_DS1682) += ds1682.o +obj-$(CONFIG_TI_DAC7512) += ti_dac7512.o +obj-$(CONFIG_C2PORT) += c2port/ +obj-$(CONFIG_IWMC3200TOP) += iwmc3200top/ +obj-y += eeprom/ +obj-y += cb710/ diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c new file mode 100644 index 00000000000..30a59f2bacd --- /dev/null +++ b/drivers/misc/ad525x_dpot.c @@ -0,0 +1,666 @@ +/* + * ad525x_dpot: Driver for the Analog Devices AD525x digital potentiometers + * Copyright (c) 2009 Analog Devices, Inc. + * Author: Michael Hennerich <hennerich@blackfin.uclinux.org> + * + * DEVID #Wipers #Positions Resistor Options (kOhm) + * AD5258 1 64 1, 10, 50, 100 + * AD5259 1 256 5, 10, 50, 100 + * AD5251 2 64 1, 10, 50, 100 + * AD5252 2 256 1, 10, 50, 100 + * AD5255 3 512 25, 250 + * AD5253 4 64 1, 10, 50, 100 + * AD5254 4 256 1, 10, 50, 100 + * + * See Documentation/misc-devices/ad525x_dpot.txt for more info. + * + * derived from ad5258.c + * Copyright (c) 2009 Cyber Switching, Inc. + * Author: Chris Verges <chrisv@cyberswitching.com> + * + * derived from ad5252.c + * Copyright (c) 2006 Michael Hennerich <hennerich@blackfin.uclinux.org> + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/delay.h> + +#define DRIVER_NAME "ad525x_dpot" +#define DRIVER_VERSION "0.1" + +enum dpot_devid { + AD5258_ID, + AD5259_ID, + AD5251_ID, + AD5252_ID, + AD5253_ID, + AD5254_ID, + AD5255_ID, +}; + +#define AD5258_MAX_POSITION 64 +#define AD5259_MAX_POSITION 256 +#define AD5251_MAX_POSITION 64 +#define AD5252_MAX_POSITION 256 +#define AD5253_MAX_POSITION 64 +#define AD5254_MAX_POSITION 256 +#define AD5255_MAX_POSITION 512 + +#define AD525X_RDAC0 0 +#define AD525X_RDAC1 1 +#define AD525X_RDAC2 2 +#define AD525X_RDAC3 3 + +#define AD525X_REG_TOL 0x18 +#define AD525X_TOL_RDAC0 (AD525X_REG_TOL | AD525X_RDAC0) +#define AD525X_TOL_RDAC1 (AD525X_REG_TOL | AD525X_RDAC1) +#define AD525X_TOL_RDAC2 (AD525X_REG_TOL | AD525X_RDAC2) +#define AD525X_TOL_RDAC3 (AD525X_REG_TOL | AD525X_RDAC3) + +/* RDAC-to-EEPROM Interface Commands */ +#define AD525X_I2C_RDAC (0x00 << 5) +#define AD525X_I2C_EEPROM (0x01 << 5) +#define AD525X_I2C_CMD (0x80) + +#define AD525X_DEC_ALL_6DB (AD525X_I2C_CMD | (0x4 << 3)) +#define AD525X_INC_ALL_6DB (AD525X_I2C_CMD | (0x9 << 3)) +#define AD525X_DEC_ALL (AD525X_I2C_CMD | (0x6 << 3)) +#define AD525X_INC_ALL (AD525X_I2C_CMD | (0xB << 3)) + +static s32 ad525x_read(struct i2c_client *client, u8 reg); +static s32 ad525x_write(struct i2c_client *client, u8 reg, u8 value); + +/* + * Client data (each client gets its own) + */ + +struct dpot_data { + struct mutex update_lock; + unsigned rdac_mask; + unsigned max_pos; + unsigned devid; +}; + +/* sysfs functions */ + +static ssize_t sysfs_show_reg(struct device *dev, + struct device_attribute *attr, char *buf, u32 reg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct dpot_data *data = i2c_get_clientdata(client); + s32 value; + + mutex_lock(&data->update_lock); + value = ad525x_read(client, reg); + mutex_unlock(&data->update_lock); + + if (value < 0) + return -EINVAL; + /* + * Let someone else deal with converting this ... + * the tolerance is a two-byte value where the MSB + * is a sign + integer value, and the LSB is a + * decimal value. See page 18 of the AD5258 + * datasheet (Rev. A) for more details. + */ + + if (reg & AD525X_REG_TOL) + return sprintf(buf, "0x%04x\n", value & 0xFFFF); + else + return sprintf(buf, "%u\n", value & data->rdac_mask); +} + +static ssize_t sysfs_set_reg(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, u32 reg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct dpot_data *data = i2c_get_clientdata(client); + unsigned long value; + int err; + + err = strict_strtoul(buf, 10, &value); + if (err) + return err; + + if (value > data->rdac_mask) + value = data->rdac_mask; + + mutex_lock(&data->update_lock); + ad525x_write(client, reg, value); + if (reg & AD525X_I2C_EEPROM) + msleep(26); /* Sleep while the EEPROM updates */ + mutex_unlock(&data->update_lock); + + return count; +} + +static ssize_t sysfs_do_cmd(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, u32 reg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct dpot_data *data = i2c_get_clientdata(client); + + mutex_lock(&data->update_lock); + ad525x_write(client, reg, 0); + mutex_unlock(&data->update_lock); + + return count; +} + +/* ------------------------------------------------------------------------- */ + +static ssize_t show_rdac0(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC0); +} + +static ssize_t set_rdac0(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_RDAC | AD525X_RDAC0); +} + +static DEVICE_ATTR(rdac0, S_IWUSR | S_IRUGO, show_rdac0, set_rdac0); + +static ssize_t show_eeprom0(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC0); +} + +static ssize_t set_eeprom0(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_EEPROM | AD525X_RDAC0); +} + +static DEVICE_ATTR(eeprom0, S_IWUSR | S_IRUGO, show_eeprom0, set_eeprom0); + +static ssize_t show_tolerance0(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, + AD525X_I2C_EEPROM | AD525X_TOL_RDAC0); +} + +static DEVICE_ATTR(tolerance0, S_IRUGO, show_tolerance0, NULL); + +/* ------------------------------------------------------------------------- */ + +static ssize_t show_rdac1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC1); +} + +static ssize_t set_rdac1(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_RDAC | AD525X_RDAC1); +} + +static DEVICE_ATTR(rdac1, S_IWUSR | S_IRUGO, show_rdac1, set_rdac1); + +static ssize_t show_eeprom1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC1); +} + +static ssize_t set_eeprom1(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_EEPROM | AD525X_RDAC1); +} + +static DEVICE_ATTR(eeprom1, S_IWUSR | S_IRUGO, show_eeprom1, set_eeprom1); + +static ssize_t show_tolerance1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, + AD525X_I2C_EEPROM | AD525X_TOL_RDAC1); +} + +static DEVICE_ATTR(tolerance1, S_IRUGO, show_tolerance1, NULL); + +/* ------------------------------------------------------------------------- */ + +static ssize_t show_rdac2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC2); +} + +static ssize_t set_rdac2(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_RDAC | AD525X_RDAC2); +} + +static DEVICE_ATTR(rdac2, S_IWUSR | S_IRUGO, show_rdac2, set_rdac2); + +static ssize_t show_eeprom2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC2); +} + +static ssize_t set_eeprom2(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_EEPROM | AD525X_RDAC2); +} + +static DEVICE_ATTR(eeprom2, S_IWUSR | S_IRUGO, show_eeprom2, set_eeprom2); + +static ssize_t show_tolerance2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, + AD525X_I2C_EEPROM | AD525X_TOL_RDAC2); +} + +static DEVICE_ATTR(tolerance2, S_IRUGO, show_tolerance2, NULL); + +/* ------------------------------------------------------------------------- */ + +static ssize_t show_rdac3(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_RDAC | AD525X_RDAC3); +} + +static ssize_t set_rdac3(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_RDAC | AD525X_RDAC3); +} + +static DEVICE_ATTR(rdac3, S_IWUSR | S_IRUGO, show_rdac3, set_rdac3); + +static ssize_t show_eeprom3(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, AD525X_I2C_EEPROM | AD525X_RDAC3); +} + +static ssize_t set_eeprom3(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_set_reg(dev, attr, buf, count, + AD525X_I2C_EEPROM | AD525X_RDAC3); +} + +static DEVICE_ATTR(eeprom3, S_IWUSR | S_IRUGO, show_eeprom3, set_eeprom3); + +static ssize_t show_tolerance3(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_show_reg(dev, attr, buf, + AD525X_I2C_EEPROM | AD525X_TOL_RDAC3); +} + +static DEVICE_ATTR(tolerance3, S_IRUGO, show_tolerance3, NULL); + +static struct attribute *ad525x_attributes_wipers[4][4] = { + { + &dev_attr_rdac0.attr, + &dev_attr_eeprom0.attr, + &dev_attr_tolerance0.attr, + NULL + }, { + &dev_attr_rdac1.attr, + &dev_attr_eeprom1.attr, + &dev_attr_tolerance1.attr, + NULL + }, { + &dev_attr_rdac2.attr, + &dev_attr_eeprom2.attr, + &dev_attr_tolerance2.attr, + NULL + }, { + &dev_attr_rdac3.attr, + &dev_attr_eeprom3.attr, + &dev_attr_tolerance3.attr, + NULL + } +}; + +static const struct attribute_group ad525x_group_wipers[] = { + {.attrs = ad525x_attributes_wipers[AD525X_RDAC0]}, + {.attrs = ad525x_attributes_wipers[AD525X_RDAC1]}, + {.attrs = ad525x_attributes_wipers[AD525X_RDAC2]}, + {.attrs = ad525x_attributes_wipers[AD525X_RDAC3]}, +}; + +/* ------------------------------------------------------------------------- */ + +static ssize_t set_inc_all(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_do_cmd(dev, attr, buf, count, AD525X_INC_ALL); +} + +static DEVICE_ATTR(inc_all, S_IWUSR, NULL, set_inc_all); + +static ssize_t set_dec_all(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_do_cmd(dev, attr, buf, count, AD525X_DEC_ALL); +} + +static DEVICE_ATTR(dec_all, S_IWUSR, NULL, set_dec_all); + +static ssize_t set_inc_all_6db(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_do_cmd(dev, attr, buf, count, AD525X_INC_ALL_6DB); +} + +static DEVICE_ATTR(inc_all_6db, S_IWUSR, NULL, set_inc_all_6db); + +static ssize_t set_dec_all_6db(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return sysfs_do_cmd(dev, attr, buf, count, AD525X_DEC_ALL_6DB); +} + +static DEVICE_ATTR(dec_all_6db, S_IWUSR, NULL, set_dec_all_6db); + +static struct attribute *ad525x_attributes_commands[] = { + &dev_attr_inc_all.attr, + &dev_attr_dec_all.attr, + &dev_attr_inc_all_6db.attr, + &dev_attr_dec_all_6db.attr, + NULL +}; + +static const struct attribute_group ad525x_group_commands = { + .attrs = ad525x_attributes_commands, +}; + +/* ------------------------------------------------------------------------- */ + +/* i2c device functions */ + +/** + * ad525x_read - return the value contained in the specified register + * on the AD5258 device. + * @client: value returned from i2c_new_device() + * @reg: the register to read + * + * If the tolerance register is specified, 2 bytes are returned. + * Otherwise, 1 byte is returned. A negative value indicates an error + * occurred while reading the register. + */ +static s32 ad525x_read(struct i2c_client *client, u8 reg) +{ + struct dpot_data *data = i2c_get_clientdata(client); + + if ((reg & AD525X_REG_TOL) || (data->max_pos > 256)) + return i2c_smbus_read_word_data(client, (reg & 0xF8) | + ((reg & 0x7) << 1)); + else + return i2c_smbus_read_byte_data(client, reg); +} + +/** + * ad525x_write - store the given value in the specified register on + * the AD5258 device. + * @client: value returned from i2c_new_device() + * @reg: the register to write + * @value: the byte to store in the register + * + * For certain instructions that do not require a data byte, "NULL" + * should be specified for the "value" parameter. These instructions + * include NOP, RESTORE_FROM_EEPROM, and STORE_TO_EEPROM. + * + * A negative return value indicates an error occurred while reading + * the register. + */ +static s32 ad525x_write(struct i2c_client *client, u8 reg, u8 value) +{ + struct dpot_data *data = i2c_get_clientdata(client); + + /* Only write the instruction byte for certain commands */ + if (reg & AD525X_I2C_CMD) + return i2c_smbus_write_byte(client, reg); + + if (data->max_pos > 256) + return i2c_smbus_write_word_data(client, (reg & 0xF8) | + ((reg & 0x7) << 1), value); + else + /* All other registers require instruction + data bytes */ + return i2c_smbus_write_byte_data(client, reg, value); +} + +static int ad525x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct dpot_data *data; + int err = 0; + + dev_dbg(dev, "%s\n", __func__); + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) { + dev_err(dev, "missing I2C functionality for this driver\n"); + goto exit; + } + + data = kzalloc(sizeof(struct dpot_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + + switch (id->driver_data) { + case AD5258_ID: + data->max_pos = AD5258_MAX_POSITION; + err = sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + break; + case AD5259_ID: + data->max_pos = AD5259_MAX_POSITION; + err = sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + break; + case AD5251_ID: + data->max_pos = AD5251_MAX_POSITION; + err = sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC3]); + err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands); + break; + case AD5252_ID: + data->max_pos = AD5252_MAX_POSITION; + err = sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC3]); + err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands); + break; + case AD5253_ID: + data->max_pos = AD5253_MAX_POSITION; + err = sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC2]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC3]); + err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands); + break; + case AD5254_ID: + data->max_pos = AD5254_MAX_POSITION; + err = sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC2]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC3]); + err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands); + break; + case AD5255_ID: + data->max_pos = AD5255_MAX_POSITION; + err = sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + err |= sysfs_create_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC2]); + err |= sysfs_create_group(&dev->kobj, &ad525x_group_commands); + break; + default: + err = -ENODEV; + goto exit_free; + } + + if (err) { + dev_err(dev, "failed to register sysfs hooks\n"); + goto exit_free; + } + + data->devid = id->driver_data; + data->rdac_mask = data->max_pos - 1; + + dev_info(dev, "%s %d-Position Digital Potentiometer registered\n", + id->name, data->max_pos); + + return 0; + +exit_free: + kfree(data); + i2c_set_clientdata(client, NULL); +exit: + dev_err(dev, "failed to create client\n"); + return err; +} + +static int __devexit ad525x_remove(struct i2c_client *client) +{ + struct dpot_data *data = i2c_get_clientdata(client); + struct device *dev = &client->dev; + + switch (data->devid) { + case AD5258_ID: + case AD5259_ID: + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + break; + case AD5251_ID: + case AD5252_ID: + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC3]); + sysfs_remove_group(&dev->kobj, &ad525x_group_commands); + break; + case AD5253_ID: + case AD5254_ID: + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC2]); + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC3]); + sysfs_remove_group(&dev->kobj, &ad525x_group_commands); + break; + case AD5255_ID: + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC0]); + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC1]); + sysfs_remove_group(&dev->kobj, + &ad525x_group_wipers[AD525X_RDAC2]); + sysfs_remove_group(&dev->kobj, &ad525x_group_commands); + break; + } + + i2c_set_clientdata(client, NULL); + kfree(data); + + return 0; +} + +static const struct i2c_device_id ad525x_idtable[] = { + {"ad5258", AD5258_ID}, + {"ad5259", AD5259_ID}, + {"ad5251", AD5251_ID}, + {"ad5252", AD5252_ID}, + {"ad5253", AD5253_ID}, + {"ad5254", AD5254_ID}, + {"ad5255", AD5255_ID}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, ad525x_idtable); + +static struct i2c_driver ad525x_driver = { + .driver = { + .owner = THIS_MODULE, + .name = DRIVER_NAME, + }, + .id_table = ad525x_idtable, + .probe = ad525x_probe, + .remove = __devexit_p(ad525x_remove), +}; + +static int __init ad525x_init(void) +{ + return i2c_add_driver(&ad525x_driver); +} + +module_init(ad525x_init); + +static void __exit ad525x_exit(void) +{ + i2c_del_driver(&ad525x_driver); +} + +module_exit(ad525x_exit); + +MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>, " + "Michael Hennerich <hennerich@blackfin.uclinux.org>, "); +MODULE_DESCRIPTION("AD5258/9 digital potentiometer driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c new file mode 100644 index 00000000000..558bf3f2c27 --- /dev/null +++ b/drivers/misc/atmel-ssc.c @@ -0,0 +1,175 @@ +/* + * Atmel SSC driver + * + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/platform_device.h> +#include <linux/list.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/atmel-ssc.h> + +/* Serialize access to ssc_list and user count */ +static DEFINE_SPINLOCK(user_lock); +static LIST_HEAD(ssc_list); + +struct ssc_device *ssc_request(unsigned int ssc_num) +{ + int ssc_valid = 0; + struct ssc_device *ssc; + + spin_lock(&user_lock); + list_for_each_entry(ssc, &ssc_list, list) { + if (ssc->pdev->id == ssc_num) { + ssc_valid = 1; + break; + } + } + + if (!ssc_valid) { + spin_unlock(&user_lock); + pr_err("ssc: ssc%d platform device is missing\n", ssc_num); + return ERR_PTR(-ENODEV); + } + + if (ssc->user) { + spin_unlock(&user_lock); + dev_dbg(&ssc->pdev->dev, "module busy\n"); + return ERR_PTR(-EBUSY); + } + ssc->user++; + spin_unlock(&user_lock); + + clk_enable(ssc->clk); + + return ssc; +} +EXPORT_SYMBOL(ssc_request); + +void ssc_free(struct ssc_device *ssc) +{ + spin_lock(&user_lock); + if (ssc->user) { + ssc->user--; + clk_disable(ssc->clk); + } else { + dev_dbg(&ssc->pdev->dev, "device already free\n"); + } + spin_unlock(&user_lock); +} +EXPORT_SYMBOL(ssc_free); + +static int __init ssc_probe(struct platform_device *pdev) +{ + int retval = 0; + struct resource *regs; + struct ssc_device *ssc; + + ssc = kzalloc(sizeof(struct ssc_device), GFP_KERNEL); + if (!ssc) { + dev_dbg(&pdev->dev, "out of memory\n"); + retval = -ENOMEM; + goto out; + } + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) { + dev_dbg(&pdev->dev, "no mmio resource defined\n"); + retval = -ENXIO; + goto out_free; + } + + ssc->clk = clk_get(&pdev->dev, "pclk"); + if (IS_ERR(ssc->clk)) { + dev_dbg(&pdev->dev, "no pclk clock defined\n"); + retval = -ENXIO; + goto out_free; + } + + ssc->pdev = pdev; + ssc->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!ssc->regs) { + dev_dbg(&pdev->dev, "ioremap failed\n"); + retval = -EINVAL; + goto out_clk; + } + + /* disable all interrupts */ + clk_enable(ssc->clk); + ssc_writel(ssc->regs, IDR, ~0UL); + ssc_readl(ssc->regs, SR); + clk_disable(ssc->clk); + + ssc->irq = platform_get_irq(pdev, 0); + if (!ssc->irq) { + dev_dbg(&pdev->dev, "could not get irq\n"); + retval = -ENXIO; + goto out_unmap; + } + + spin_lock(&user_lock); + list_add_tail(&ssc->list, &ssc_list); + spin_unlock(&user_lock); + + platform_set_drvdata(pdev, ssc); + + dev_info(&pdev->dev, "Atmel SSC device at 0x%p (irq %d)\n", + ssc->regs, ssc->irq); + + goto out; + +out_unmap: + iounmap(ssc->regs); +out_clk: + clk_put(ssc->clk); +out_free: + kfree(ssc); +out: + return retval; +} + +static int __devexit ssc_remove(struct platform_device *pdev) +{ + struct ssc_device *ssc = platform_get_drvdata(pdev); + + spin_lock(&user_lock); + iounmap(ssc->regs); + clk_put(ssc->clk); + list_del(&ssc->list); + kfree(ssc); + spin_unlock(&user_lock); + + return 0; +} + +static struct platform_driver ssc_driver = { + .remove = __devexit_p(ssc_remove), + .driver = { + .name = "ssc", + .owner = THIS_MODULE, + }, +}; + +static int __init ssc_init(void) +{ + return platform_driver_probe(&ssc_driver, ssc_probe); +} +module_init(ssc_init); + +static void __exit ssc_exit(void) +{ + platform_driver_unregister(&ssc_driver); +} +module_exit(ssc_exit); + +MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>"); +MODULE_DESCRIPTION("SSC driver for Atmel AVR32 and AT91"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ssc"); diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c new file mode 100644 index 00000000000..6aa5294dfec --- /dev/null +++ b/drivers/misc/atmel_pwm.c @@ -0,0 +1,409 @@ +#include <linux/module.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/atmel_pwm.h> + + +/* + * This is a simple driver for the PWM controller found in various newer + * Atmel SOCs, including the AVR32 series and the AT91sam9263. + * + * Chips with current Linux ports have only 4 PWM channels, out of max 32. + * AT32UC3A and AT32UC3B chips have 7 channels (but currently no Linux). + * Docs are inconsistent about the width of the channel counter registers; + * it's at least 16 bits, but several places say 20 bits. + */ +#define PWM_NCHAN 4 /* max 32 */ + +struct pwm { + spinlock_t lock; + struct platform_device *pdev; + u32 mask; + int irq; + void __iomem *base; + struct clk *clk; + struct pwm_channel *channel[PWM_NCHAN]; + void (*handler[PWM_NCHAN])(struct pwm_channel *); +}; + + +/* global PWM controller registers */ +#define PWM_MR 0x00 +#define PWM_ENA 0x04 +#define PWM_DIS 0x08 +#define PWM_SR 0x0c +#define PWM_IER 0x10 +#define PWM_IDR 0x14 +#define PWM_IMR 0x18 +#define PWM_ISR 0x1c + +static inline void pwm_writel(const struct pwm *p, unsigned offset, u32 val) +{ + __raw_writel(val, p->base + offset); +} + +static inline u32 pwm_readl(const struct pwm *p, unsigned offset) +{ + return __raw_readl(p->base + offset); +} + +static inline void __iomem *pwmc_regs(const struct pwm *p, int index) +{ + return p->base + 0x200 + index * 0x20; +} + +static struct pwm *pwm; + +static void pwm_dumpregs(struct pwm_channel *ch, char *tag) +{ + struct device *dev = &pwm->pdev->dev; + + dev_dbg(dev, "%s: mr %08x, sr %08x, imr %08x\n", + tag, + pwm_readl(pwm, PWM_MR), + pwm_readl(pwm, PWM_SR), + pwm_readl(pwm, PWM_IMR)); + dev_dbg(dev, + "pwm ch%d - mr %08x, dty %u, prd %u, cnt %u\n", + ch->index, + pwm_channel_readl(ch, PWM_CMR), + pwm_channel_readl(ch, PWM_CDTY), + pwm_channel_readl(ch, PWM_CPRD), + pwm_channel_readl(ch, PWM_CCNT)); +} + + +/** + * pwm_channel_alloc - allocate an unused PWM channel + * @index: identifies the channel + * @ch: structure to be initialized + * + * Drivers allocate PWM channels according to the board's wiring, and + * matching board-specific setup code. Returns zero or negative errno. + */ +int pwm_channel_alloc(int index, struct pwm_channel *ch) +{ + unsigned long flags; + int status = 0; + + /* insist on PWM init, with this signal pinned out */ + if (!pwm || !(pwm->mask & 1 << index)) + return -ENODEV; + + if (index < 0 || index >= PWM_NCHAN || !ch) + return -EINVAL; + memset(ch, 0, sizeof *ch); + + spin_lock_irqsave(&pwm->lock, flags); + if (pwm->channel[index]) + status = -EBUSY; + else { + clk_enable(pwm->clk); + + ch->regs = pwmc_regs(pwm, index); + ch->index = index; + + /* REVISIT: ap7000 seems to go 2x as fast as we expect!! */ + ch->mck = clk_get_rate(pwm->clk); + + pwm->channel[index] = ch; + pwm->handler[index] = NULL; + + /* channel and irq are always disabled when we return */ + pwm_writel(pwm, PWM_DIS, 1 << index); + pwm_writel(pwm, PWM_IDR, 1 << index); + } + spin_unlock_irqrestore(&pwm->lock, flags); + return status; +} +EXPORT_SYMBOL(pwm_channel_alloc); + +static int pwmcheck(struct pwm_channel *ch) +{ + int index; + + if (!pwm) + return -ENODEV; + if (!ch) + return -EINVAL; + index = ch->index; + if (index < 0 || index >= PWM_NCHAN || pwm->channel[index] != ch) + return -EINVAL; + + return index; +} + +/** + * pwm_channel_free - release a previously allocated channel + * @ch: the channel being released + * + * The channel is completely shut down (counter and IRQ disabled), + * and made available for re-use. Returns zero, or negative errno. + */ +int pwm_channel_free(struct pwm_channel *ch) +{ + unsigned long flags; + int t; + + spin_lock_irqsave(&pwm->lock, flags); + t = pwmcheck(ch); + if (t >= 0) { + pwm->channel[t] = NULL; + pwm->handler[t] = NULL; + + /* channel and irq are always disabled when we return */ + pwm_writel(pwm, PWM_DIS, 1 << t); + pwm_writel(pwm, PWM_IDR, 1 << t); + + clk_disable(pwm->clk); + t = 0; + } + spin_unlock_irqrestore(&pwm->lock, flags); + return t; +} +EXPORT_SYMBOL(pwm_channel_free); + +int __pwm_channel_onoff(struct pwm_channel *ch, int enabled) +{ + unsigned long flags; + int t; + + /* OMITTED FUNCTIONALITY: starting several channels in synch */ + + spin_lock_irqsave(&pwm->lock, flags); + t = pwmcheck(ch); + if (t >= 0) { + pwm_writel(pwm, enabled ? PWM_ENA : PWM_DIS, 1 << t); + t = 0; + pwm_dumpregs(ch, enabled ? "enable" : "disable"); + } + spin_unlock_irqrestore(&pwm->lock, flags); + + return t; +} +EXPORT_SYMBOL(__pwm_channel_onoff); + +/** + * pwm_clk_alloc - allocate and configure CLKA or CLKB + * @prescale: from 0..10, the power of two used to divide MCK + * @div: from 1..255, the linear divisor to use + * + * Returns PWM_CPR_CLKA, PWM_CPR_CLKB, or negative errno. The allocated + * clock will run with a period of (2^prescale * div) / MCK, or twice as + * long if center aligned PWM output is used. The clock must later be + * deconfigured using pwm_clk_free(). + */ +int pwm_clk_alloc(unsigned prescale, unsigned div) +{ + unsigned long flags; + u32 mr; + u32 val = (prescale << 8) | div; + int ret = -EBUSY; + + if (prescale >= 10 || div == 0 || div > 255) + return -EINVAL; + + spin_lock_irqsave(&pwm->lock, flags); + mr = pwm_readl(pwm, PWM_MR); + if ((mr & 0xffff) == 0) { + mr |= val; + ret = PWM_CPR_CLKA; + } else if ((mr & (0xffff << 16)) == 0) { + mr |= val << 16; + ret = PWM_CPR_CLKB; + } + if (ret > 0) + pwm_writel(pwm, PWM_MR, mr); + spin_unlock_irqrestore(&pwm->lock, flags); + return ret; +} +EXPORT_SYMBOL(pwm_clk_alloc); + +/** + * pwm_clk_free - deconfigure and release CLKA or CLKB + * + * Reverses the effect of pwm_clk_alloc(). + */ +void pwm_clk_free(unsigned clk) +{ + unsigned long flags; + u32 mr; + + spin_lock_irqsave(&pwm->lock, flags); + mr = pwm_readl(pwm, PWM_MR); + if (clk == PWM_CPR_CLKA) + pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 0)); + if (clk == PWM_CPR_CLKB) + pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 16)); + spin_unlock_irqrestore(&pwm->lock, flags); +} +EXPORT_SYMBOL(pwm_clk_free); + +/** + * pwm_channel_handler - manage channel's IRQ handler + * @ch: the channel + * @handler: the handler to use, possibly NULL + * + * If the handler is non-null, the handler will be called after every + * period of this PWM channel. If the handler is null, this channel + * won't generate an IRQ. + */ +int pwm_channel_handler(struct pwm_channel *ch, + void (*handler)(struct pwm_channel *ch)) +{ + unsigned long flags; + int t; + + spin_lock_irqsave(&pwm->lock, flags); + t = pwmcheck(ch); + if (t >= 0) { + pwm->handler[t] = handler; + pwm_writel(pwm, handler ? PWM_IER : PWM_IDR, 1 << t); + t = 0; + } + spin_unlock_irqrestore(&pwm->lock, flags); + + return t; +} +EXPORT_SYMBOL(pwm_channel_handler); + +static irqreturn_t pwm_irq(int id, void *_pwm) +{ + struct pwm *p = _pwm; + irqreturn_t handled = IRQ_NONE; + u32 irqstat; + int index; + + spin_lock(&p->lock); + + /* ack irqs, then handle them */ + irqstat = pwm_readl(pwm, PWM_ISR); + + while (irqstat) { + struct pwm_channel *ch; + void (*handler)(struct pwm_channel *ch); + + index = ffs(irqstat) - 1; + irqstat &= ~(1 << index); + ch = pwm->channel[index]; + handler = pwm->handler[index]; + if (handler && ch) { + spin_unlock(&p->lock); + handler(ch); + spin_lock(&p->lock); + handled = IRQ_HANDLED; + } + } + + spin_unlock(&p->lock); + return handled; +} + +static int __init pwm_probe(struct platform_device *pdev) +{ + struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int irq = platform_get_irq(pdev, 0); + u32 *mp = pdev->dev.platform_data; + struct pwm *p; + int status = -EIO; + + if (pwm) + return -EBUSY; + if (!r || irq < 0 || !mp || !*mp) + return -ENODEV; + if (*mp & ~((1<<PWM_NCHAN)-1)) { + dev_warn(&pdev->dev, "mask 0x%x ... more than %d channels\n", + *mp, PWM_NCHAN); + return -EINVAL; + } + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + spin_lock_init(&p->lock); + p->pdev = pdev; + p->mask = *mp; + p->irq = irq; + p->base = ioremap(r->start, r->end - r->start + 1); + if (!p->base) + goto fail; + p->clk = clk_get(&pdev->dev, "pwm_clk"); + if (IS_ERR(p->clk)) { + status = PTR_ERR(p->clk); + p->clk = NULL; + goto fail; + } + + status = request_irq(irq, pwm_irq, 0, pdev->name, p); + if (status < 0) + goto fail; + + pwm = p; + platform_set_drvdata(pdev, p); + + return 0; + +fail: + if (p->clk) + clk_put(p->clk); + if (p->base) + iounmap(p->base); + + kfree(p); + return status; +} + +static int __exit pwm_remove(struct platform_device *pdev) +{ + struct pwm *p = platform_get_drvdata(pdev); + + if (p != pwm) + return -EINVAL; + + clk_enable(pwm->clk); + pwm_writel(pwm, PWM_DIS, (1 << PWM_NCHAN) - 1); + pwm_writel(pwm, PWM_IDR, (1 << PWM_NCHAN) - 1); + clk_disable(pwm->clk); + + pwm = NULL; + + free_irq(p->irq, p); + clk_put(p->clk); + iounmap(p->base); + kfree(p); + + return 0; +} + +static struct platform_driver atmel_pwm_driver = { + .driver = { + .name = "atmel_pwm", + .owner = THIS_MODULE, + }, + .remove = __exit_p(pwm_remove), + + /* NOTE: PWM can keep running in AVR32 "idle" and "frozen" states; + * and all AT91sam9263 states, albeit at reduced clock rate if + * MCK becomes the slow clock (i.e. what Linux labels STR). + */ +}; + +static int __init pwm_init(void) +{ + return platform_driver_probe(&atmel_pwm_driver, pwm_probe); +} +module_init(pwm_init); + +static void __exit pwm_exit(void) +{ + platform_driver_unregister(&atmel_pwm_driver); +} +module_exit(pwm_exit); + +MODULE_DESCRIPTION("Driver for AT32/AT91 PWM module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:atmel_pwm"); diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c new file mode 100644 index 00000000000..05dc8a31f28 --- /dev/null +++ b/drivers/misc/atmel_tclib.c @@ -0,0 +1,161 @@ +#include <linux/atmel_tc.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> + +/* Number of bytes to reserve for the iomem resource */ +#define ATMEL_TC_IOMEM_SIZE 256 + + +/* + * This is a thin library to solve the problem of how to portably allocate + * one of the TC blocks. For simplicity, it doesn't currently expect to + * share individual timers between different drivers. + */ + +#if defined(CONFIG_AVR32) +/* AVR32 has these divide PBB */ +const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, }; +EXPORT_SYMBOL(atmel_tc_divisors); + +#elif defined(CONFIG_ARCH_AT91) +/* AT91 has these divide MCK */ +const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, }; +EXPORT_SYMBOL(atmel_tc_divisors); + +#endif + +static DEFINE_SPINLOCK(tc_list_lock); +static LIST_HEAD(tc_list); + +/** + * atmel_tc_alloc - allocate a specified TC block + * @block: which block to allocate + * @name: name to be associated with the iomem resource + * + * Caller allocates a block. If it is available, a pointer to a + * pre-initialized struct atmel_tc is returned. The caller can access + * the registers directly through the "regs" field. + */ +struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name) +{ + struct atmel_tc *tc; + struct platform_device *pdev = NULL; + struct resource *r; + + spin_lock(&tc_list_lock); + list_for_each_entry(tc, &tc_list, node) { + if (tc->pdev->id == block) { + pdev = tc->pdev; + break; + } + } + + if (!pdev || tc->iomem) + goto fail; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + r = request_mem_region(r->start, ATMEL_TC_IOMEM_SIZE, name); + if (!r) + goto fail; + + tc->regs = ioremap(r->start, ATMEL_TC_IOMEM_SIZE); + if (!tc->regs) + goto fail_ioremap; + + tc->iomem = r; + +out: + spin_unlock(&tc_list_lock); + return tc; + +fail_ioremap: + release_resource(r); +fail: + tc = NULL; + goto out; +} +EXPORT_SYMBOL_GPL(atmel_tc_alloc); + +/** + * atmel_tc_free - release a specified TC block + * @tc: Timer/counter block that was returned by atmel_tc_alloc() + * + * This reverses the effect of atmel_tc_alloc(), unmapping the I/O + * registers, invalidating the resource returned by that routine and + * making the TC available to other drivers. + */ +void atmel_tc_free(struct atmel_tc *tc) +{ + spin_lock(&tc_list_lock); + if (tc->regs) { + iounmap(tc->regs); + release_resource(tc->iomem); + tc->regs = NULL; + tc->iomem = NULL; + } + spin_unlock(&tc_list_lock); +} +EXPORT_SYMBOL_GPL(atmel_tc_free); + +static int __init tc_probe(struct platform_device *pdev) +{ + struct atmel_tc *tc; + struct clk *clk; + int irq; + + if (!platform_get_resource(pdev, IORESOURCE_MEM, 0)) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -EINVAL; + + tc = kzalloc(sizeof(struct atmel_tc), GFP_KERNEL); + if (!tc) + return -ENOMEM; + + tc->pdev = pdev; + + clk = clk_get(&pdev->dev, "t0_clk"); + if (IS_ERR(clk)) { + kfree(tc); + return -EINVAL; + } + + tc->clk[0] = clk; + tc->clk[1] = clk_get(&pdev->dev, "t1_clk"); + if (IS_ERR(tc->clk[1])) + tc->clk[1] = clk; + tc->clk[2] = clk_get(&pdev->dev, "t2_clk"); + if (IS_ERR(tc->clk[2])) + tc->clk[2] = clk; + + tc->irq[0] = irq; + tc->irq[1] = platform_get_irq(pdev, 1); + if (tc->irq[1] < 0) + tc->irq[1] = irq; + tc->irq[2] = platform_get_irq(pdev, 2); + if (tc->irq[2] < 0) + tc->irq[2] = irq; + + spin_lock(&tc_list_lock); + list_add_tail(&tc->node, &tc_list); + spin_unlock(&tc_list_lock); + + return 0; +} + +static struct platform_driver tc_driver = { + .driver.name = "atmel_tcb", +}; + +static int __init tc_init(void) +{ + return platform_driver_probe(&tc_driver, tc_probe); +} +arch_initcall(tc_init); diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig new file mode 100644 index 00000000000..e46af9a5810 --- /dev/null +++ b/drivers/misc/c2port/Kconfig @@ -0,0 +1,35 @@ +# +# C2 port devices +# + +menuconfig C2PORT + tristate "Silicon Labs C2 port support (EXPERIMENTAL)" + depends on EXPERIMENTAL + default no + help + This option enables support for Silicon Labs C2 port used to + program Silicon micro controller chips (and other 8051 compatible). + + If your board have no such micro controllers you don't need this + interface at all. + + To compile this driver as a module, choose M here: the module will + be called c2port_core. Note that you also need a client module + usually called c2port-*. + + If you are not sure, say N here. + +if C2PORT + +config C2PORT_DURAMAR_2150 + tristate "C2 port support for Eurotech's Duramar 2150 (EXPERIMENTAL)" + depends on X86 && C2PORT + default no + help + This option enables C2 support for the Eurotech's Duramar 2150 + on board micro controller. + + To compile this driver as a module, choose M here: the module will + be called c2port-duramar2150. + +endif # C2PORT diff --git a/drivers/misc/c2port/Makefile b/drivers/misc/c2port/Makefile new file mode 100644 index 00000000000..3b2cf43d60f --- /dev/null +++ b/drivers/misc/c2port/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_C2PORT) += core.o + +obj-$(CONFIG_C2PORT_DURAMAR_2150) += c2port-duramar2150.o diff --git a/drivers/misc/c2port/c2port-duramar2150.c b/drivers/misc/c2port/c2port-duramar2150.c new file mode 100644 index 00000000000..338dcc12150 --- /dev/null +++ b/drivers/misc/c2port/c2port-duramar2150.c @@ -0,0 +1,158 @@ +/* + * Silicon Labs C2 port Linux support for Eurotech Duramar 2150 + * + * Copyright (c) 2008 Rodolfo Giometti <giometti@linux.it> + * Copyright (c) 2008 Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/c2port.h> + +#define DATA_PORT 0x325 +#define DIR_PORT 0x326 +#define C2D (1 << 0) +#define C2CK (1 << 1) + +static DEFINE_MUTEX(update_lock); + +/* + * C2 port operations + */ + +static void duramar2150_c2port_access(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DIR_PORT); + + /* 0 = input, 1 = output */ + if (status) + outb(v | (C2D | C2CK), DIR_PORT); + else + /* When access is "off" is important that both lines are set + * as inputs or hi-impedence */ + outb(v & ~(C2D | C2CK), DIR_PORT); + + mutex_unlock(&update_lock); +} + +static void duramar2150_c2port_c2d_dir(struct c2port_device *dev, int dir) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DIR_PORT); + + if (dir) + outb(v & ~C2D, DIR_PORT); + else + outb(v | C2D, DIR_PORT); + + mutex_unlock(&update_lock); +} + +static int duramar2150_c2port_c2d_get(struct c2port_device *dev) +{ + return inb(DATA_PORT) & C2D; +} + +static void duramar2150_c2port_c2d_set(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DATA_PORT); + + if (status) + outb(v | C2D, DATA_PORT); + else + outb(v & ~C2D, DATA_PORT); + + mutex_unlock(&update_lock); +} + +static void duramar2150_c2port_c2ck_set(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DATA_PORT); + + if (status) + outb(v | C2CK, DATA_PORT); + else + outb(v & ~C2CK, DATA_PORT); + + mutex_unlock(&update_lock); +} + +static struct c2port_ops duramar2150_c2port_ops = { + .block_size = 512, /* bytes */ + .blocks_num = 30, /* total flash size: 15360 bytes */ + + .access = duramar2150_c2port_access, + .c2d_dir = duramar2150_c2port_c2d_dir, + .c2d_get = duramar2150_c2port_c2d_get, + .c2d_set = duramar2150_c2port_c2d_set, + .c2ck_set = duramar2150_c2port_c2ck_set, +}; + +static struct c2port_device *duramar2150_c2port_dev; + +/* + * Module stuff + */ + +static int __init duramar2150_c2port_init(void) +{ + struct resource *res; + int ret = 0; + + res = request_region(0x325, 2, "c2port"); + if (!res) + return -EBUSY; + + duramar2150_c2port_dev = c2port_device_register("uc", + &duramar2150_c2port_ops, NULL); + if (!duramar2150_c2port_dev) { + ret = -ENODEV; + goto free_region; + } + + return 0; + +free_region: + release_region(0x325, 2); + return ret; +} + +static void __exit duramar2150_c2port_exit(void) +{ + /* Setup the GPIOs as input by default (access = 0) */ + duramar2150_c2port_access(duramar2150_c2port_dev, 0); + + c2port_device_unregister(duramar2150_c2port_dev); + + release_region(0x325, 2); +} + +module_init(duramar2150_c2port_init); +module_exit(duramar2150_c2port_exit); + +MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>"); +MODULE_DESCRIPTION("Silicon Labs C2 port Linux support for Duramar 2150"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c new file mode 100644 index 00000000000..b5346b4db91 --- /dev/null +++ b/drivers/misc/c2port/core.c @@ -0,0 +1,1005 @@ +/* + * Silicon Labs C2 port core Linux support + * + * Copyright (c) 2007 Rodolfo Giometti <giometti@linux.it> + * Copyright (c) 2007 Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/kmemcheck.h> +#include <linux/ctype.h> +#include <linux/delay.h> +#include <linux/idr.h> +#include <linux/sched.h> + +#include <linux/c2port.h> + +#define DRIVER_NAME "c2port" +#define DRIVER_VERSION "0.51.0" + +static DEFINE_SPINLOCK(c2port_idr_lock); +static DEFINE_IDR(c2port_idr); + +/* + * Local variables + */ + +static struct class *c2port_class; + +/* + * C2 registers & commands defines + */ + +/* C2 registers */ +#define C2PORT_DEVICEID 0x00 +#define C2PORT_REVID 0x01 +#define C2PORT_FPCTL 0x02 +#define C2PORT_FPDAT 0xB4 + +/* C2 interface commands */ +#define C2PORT_GET_VERSION 0x01 +#define C2PORT_DEVICE_ERASE 0x03 +#define C2PORT_BLOCK_READ 0x06 +#define C2PORT_BLOCK_WRITE 0x07 +#define C2PORT_PAGE_ERASE 0x08 + +/* C2 status return codes */ +#define C2PORT_INVALID_COMMAND 0x00 +#define C2PORT_COMMAND_FAILED 0x02 +#define C2PORT_COMMAND_OK 0x0d + +/* + * C2 port low level signal managements + */ + +static void c2port_reset(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* To reset the device we have to keep clock line low for at least + * 20us. + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(25); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +static void c2port_strobe_ck(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* During hi-low-hi transition we disable local IRQs to avoid + * interructions since C2 port specification says that it must be + * shorter than 5us, otherwise the microcontroller may consider + * it as a reset signal! + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(1); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +/* + * C2 port basic functions + */ + +static void c2port_write_ar(struct c2port_device *dev, u8 addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (11b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, addr & 0x01); + c2port_strobe_ck(dev); + + addr >>= 1; + } + + /* STOP field */ + ops->c2d_dir(dev, 1); + c2port_strobe_ck(dev); +} + +static int c2port_read_ar(struct c2port_device *dev, u8 *addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (10b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + ops->c2d_dir(dev, 1); + *addr = 0; + for (i = 0; i < 8; i++) { + *addr >>= 1; /* shift in 8-bit ADDRESS field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *addr |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_write_dr(struct c2port_device *dev, u8 data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (01b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* DATA field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, data & 0x01); + c2port_strobe_ck(dev); + + data >>= 1; + } + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_read_dr(struct c2port_device *dev, u8 *data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (00b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* DATA field */ + *data = 0; + for (i = 0; i < 8; i++) { + *data >>= 1; /* shift in 8-bit DATA field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *data |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_poll_in_busy(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 20; + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (!(addr & 0x02)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +static int c2port_poll_out_ready(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 10000; /* erase flash needs long time... */ + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (addr & 0x01) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +/* + * sysfs methods + */ + +static ssize_t c2port_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", c2dev->name); +} + +static ssize_t c2port_show_flash_blocks_num(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num); +} + +static ssize_t c2port_show_flash_block_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->block_size); +} + +static ssize_t c2port_show_flash_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size); +} + +static ssize_t c2port_show_access(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->access); +} + +static ssize_t c2port_store_access(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + int status, ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + + c2dev->access = !!status; + + /* If access is "on" clock should be HIGH _before_ setting the line + * as output and data line should be set as INPUT anyway */ + if (c2dev->access) + ops->c2ck_set(c2dev, 1); + ops->access(c2dev, c2dev->access); + if (c2dev->access) + ops->c2d_dir(c2dev, 1); + + mutex_unlock(&c2dev->mutex); + + return count; +} + +static ssize_t c2port_store_reset(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + + c2port_reset(c2dev); + c2dev->flash_access = 0; + + mutex_unlock(&c2dev->mutex); + + return count; +} + +static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select DEVICEID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_DEVICEID); + + /* Read and return the device ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_dev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_dev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(dev, "cannot read from %s\n", c2dev->name); + + return ret; +} + +static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select REVID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_REVID); + + /* Read and return the revision ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_rev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_rev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name); + + return ret; +} + +static ssize_t c2port_show_flash_access(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->flash_access); +} + +static ssize_t __c2port_store_flash_access(struct c2port_device *dev, + int status) +{ + int ret; + + /* Check the device access status */ + if (!dev->access) + return -EBUSY; + + dev->flash_access = !!status; + + /* If flash_access is off we have nothing to do... */ + if (dev->flash_access == 0) + return 0; + + /* Target the C2 flash programming control register for C2 data + * register access */ + c2port_write_ar(dev, C2PORT_FPCTL); + + /* Write the first keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x02); + if (ret < 0) + return ret; + + /* Write the second keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x01); + if (ret < 0) + return ret; + + /* Delay for at least 20ms to ensure the target is ready for + * C2 flash programming */ + mdelay(25); + + return 0; +} + +static ssize_t c2port_store_flash_access(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int status; + ssize_t ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + ret = __c2port_store_flash_access(c2dev, status); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot enable %s flash programming\n", + c2dev->name); + return ret; + } + + return count; +} + +static ssize_t __c2port_write_flash_erase(struct c2port_device *dev) +{ + u8 status; + int ret; + + /* Target the C2 flash programming data register for C2 data register + * access. + */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send device erase command */ + c2port_write_dr(dev, C2PORT_DEVICE_ERASE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send a three-byte arming sequence to enable the device erase. + * If the sequence is not received correctly, the command will be + * ignored. + * Sequence is: 0xde, 0xad, 0xa5. + */ + c2port_write_dr(dev, 0xde); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xad); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xa5); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return 0; +} + +static ssize_t c2port_store_flash_erase(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_erase(c2dev); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name); + return ret; + } + + return count; +} + +static ssize_t __c2port_read_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nread = 128; + int i, ret; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return 0; + + if (ops->block_size * ops->blocks_num - offset < nread) + nread = ops->block_size * ops->blocks_num - offset; + if (count < nread) + nread = count; + if (nread == 0) + return nread; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block read command */ + c2port_write_dr(dev, C2PORT_BLOCK_READ); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nread); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before reading FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Read flash block */ + for (i = 0; i < nread; i++) { + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + ret = c2port_read_dr(dev, buffer+i); + if (ret < 0) + return ret; + } + + return nread; +} + +static ssize_t c2port_read_flash_data(struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = + dev_get_drvdata(container_of(kobj, + struct device, kobj)); + ssize_t ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_read_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name); + + return ret; +} + +static ssize_t __c2port_write_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nwrite = 128; + int i, ret; + + if (nwrite > count) + nwrite = count; + if (ops->block_size * ops->blocks_num - offset < nwrite) + nwrite = ops->block_size * ops->blocks_num - offset; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return -EINVAL; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block write command */ + c2port_write_dr(dev, C2PORT_BLOCK_WRITE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nwrite); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before writing FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Write flash block */ + for (i = 0; i < nwrite; i++) { + ret = c2port_write_dr(dev, *(buffer+i)); + if (ret < 0) + return ret; + + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + } + + /* Wait for last flash write to complete */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return nwrite; +} + +static ssize_t c2port_write_flash_data(struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = + dev_get_drvdata(container_of(kobj, + struct device, kobj)); + int ret; + + /* Check the device access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name); + + return ret; +} + +/* + * Class attributes + */ + +static struct device_attribute c2port_attrs[] = { + __ATTR(name, 0444, c2port_show_name, NULL), + __ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL), + __ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL), + __ATTR(flash_size, 0444, c2port_show_flash_size, NULL), + __ATTR(access, 0644, c2port_show_access, c2port_store_access), + __ATTR(reset, 0200, NULL, c2port_store_reset), + __ATTR(dev_id, 0444, c2port_show_dev_id, NULL), + __ATTR(rev_id, 0444, c2port_show_rev_id, NULL), + + __ATTR(flash_access, 0644, c2port_show_flash_access, + c2port_store_flash_access), + __ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase), + __ATTR_NULL, +}; + +static struct bin_attribute c2port_bin_attrs = { + .attr = { + .name = "flash_data", + .mode = 0644 + }, + .read = c2port_read_flash_data, + .write = c2port_write_flash_data, + /* .size is computed at run-time */ +}; + +/* + * Exported functions + */ + +struct c2port_device *c2port_device_register(char *name, + struct c2port_ops *ops, void *devdata) +{ + struct c2port_device *c2dev; + int id, ret; + + if (unlikely(!ops) || unlikely(!ops->access) || \ + unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \ + unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set)) + return ERR_PTR(-EINVAL); + + c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); + kmemcheck_annotate_bitfield(c2dev, flags); + if (unlikely(!c2dev)) + return ERR_PTR(-ENOMEM); + + ret = idr_pre_get(&c2port_idr, GFP_KERNEL); + if (!ret) { + ret = -ENOMEM; + goto error_idr_get_new; + } + + spin_lock_irq(&c2port_idr_lock); + ret = idr_get_new(&c2port_idr, c2dev, &id); + spin_unlock_irq(&c2port_idr_lock); + + if (ret < 0) + goto error_idr_get_new; + c2dev->id = id; + + c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, + "c2port%d", id); + if (unlikely(!c2dev->dev)) { + ret = -ENOMEM; + goto error_device_create; + } + dev_set_drvdata(c2dev->dev, c2dev); + + strncpy(c2dev->name, name, C2PORT_NAME_LEN); + c2dev->ops = ops; + mutex_init(&c2dev->mutex); + + /* Create binary file */ + c2port_bin_attrs.size = ops->blocks_num * ops->block_size; + ret = device_create_bin_file(c2dev->dev, &c2port_bin_attrs); + if (unlikely(ret)) + goto error_device_create_bin_file; + + /* By default C2 port access is off */ + c2dev->access = c2dev->flash_access = 0; + ops->access(c2dev, 0); + + dev_info(c2dev->dev, "C2 port %s added\n", name); + dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes " + "(%d bytes total)\n", + name, ops->blocks_num, ops->block_size, + ops->blocks_num * ops->block_size); + + return c2dev; + +error_device_create_bin_file: + device_destroy(c2port_class, 0); + +error_device_create: + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, id); + spin_unlock_irq(&c2port_idr_lock); + +error_idr_get_new: + kfree(c2dev); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL(c2port_device_register); + +void c2port_device_unregister(struct c2port_device *c2dev) +{ + if (!c2dev) + return; + + dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name); + + device_remove_bin_file(c2dev->dev, &c2port_bin_attrs); + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, c2dev->id); + spin_unlock_irq(&c2port_idr_lock); + + device_destroy(c2port_class, c2dev->id); + + kfree(c2dev); +} +EXPORT_SYMBOL(c2port_device_unregister); + +/* + * Module stuff + */ + +static int __init c2port_init(void) +{ + printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION + " - (C) 2007 Rodolfo Giometti\n"); + + c2port_class = class_create(THIS_MODULE, "c2port"); + if (!c2port_class) { + printk(KERN_ERR "c2port: failed to allocate class\n"); + return -ENOMEM; + } + c2port_class->dev_attrs = c2port_attrs; + + return 0; +} + +static void __exit c2port_exit(void) +{ + class_destroy(c2port_class); +} + +module_init(c2port_init); +module_exit(c2port_exit); + +MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>"); +MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig new file mode 100644 index 00000000000..22429b8b106 --- /dev/null +++ b/drivers/misc/cb710/Kconfig @@ -0,0 +1,25 @@ +config CB710_CORE + tristate "ENE CB710/720 Flash memory card reader support" + depends on PCI + help + This option enables support for PCI ENE CB710/720 Flash memory card + reader found in some laptops (ie. some versions of HP Compaq nx9500). + + You will also have to select some flash card format drivers (MMC/SD, + MemoryStick). + + This driver can also be built as a module. If so, the module + will be called cb710. + +config CB710_DEBUG + bool "Enable driver debugging" + depends on CB710_CORE != n + default n + help + This is an option for use by developers; most people should + say N here. This adds a lot of debugging output to dmesg. + +config CB710_DEBUG_ASSUMPTIONS + bool + depends on CB710_CORE != n + default y diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile new file mode 100644 index 00000000000..7b80cbf1a60 --- /dev/null +++ b/drivers/misc/cb710/Makefile @@ -0,0 +1,8 @@ +ifeq ($(CONFIG_CB710_DEBUG),y) + EXTRA_CFLAGS += -DDEBUG +endif + +obj-$(CONFIG_CB710_CORE) += cb710.o + +cb710-y := core.o sgbuf2.o +cb710-$(CONFIG_CB710_DEBUG) += debug.o diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c new file mode 100644 index 00000000000..b14eab0f2ba --- /dev/null +++ b/drivers/misc/cb710/core.c @@ -0,0 +1,357 @@ +/* + * cb710/core.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/idr.h> +#include <linux/cb710.h> + +static DEFINE_IDA(cb710_ida); +static DEFINE_SPINLOCK(cb710_ida_lock); + +void cb710_pci_update_config_reg(struct pci_dev *pdev, + int reg, uint32_t mask, uint32_t xor) +{ + u32 rval; + + pci_read_config_dword(pdev, reg, &rval); + rval = (rval & mask) ^ xor; + pci_write_config_dword(pdev, reg, rval); +} +EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg); + +/* Some magic writes based on Windows driver init code */ +static int __devinit cb710_pci_configure(struct pci_dev *pdev) +{ + unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + struct pci_dev *pdev0 = pci_get_slot(pdev->bus, devfn); + u32 val; + + cb710_pci_update_config_reg(pdev, 0x48, + ~0x000000FF, 0x0000003F); + + pci_read_config_dword(pdev, 0x48, &val); + if (val & 0x80000000) + return 0; + + if (!pdev0) + return -ENODEV; + + if (pdev0->vendor == PCI_VENDOR_ID_ENE + && pdev0->device == PCI_DEVICE_ID_ENE_720) { + cb710_pci_update_config_reg(pdev0, 0x8C, + ~0x00F00000, 0x00100000); + cb710_pci_update_config_reg(pdev0, 0xB0, + ~0x08000000, 0x08000000); + } + + cb710_pci_update_config_reg(pdev0, 0x8C, + ~0x00000F00, 0x00000200); + cb710_pci_update_config_reg(pdev0, 0x90, + ~0x00060000, 0x00040000); + + pci_dev_put(pdev0); + + return 0; +} + +static irqreturn_t cb710_irq_handler(int irq, void *data) +{ + struct cb710_chip *chip = data; + struct cb710_slot *slot = &chip->slot[0]; + irqreturn_t handled = IRQ_NONE; + unsigned nr; + + spin_lock(&chip->irq_lock); /* incl. smp_rmb() */ + + for (nr = chip->slots; nr; ++slot, --nr) { + cb710_irq_handler_t handler_func = slot->irq_handler; + if (handler_func && handler_func(slot)) + handled = IRQ_HANDLED; + } + + spin_unlock(&chip->irq_lock); + + return handled; +} + +static void cb710_release_slot(struct device *dev) +{ +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev)); + struct cb710_chip *chip = cb710_slot_to_chip(slot); + + /* slot struct can be freed now */ + atomic_dec(&chip->slot_refs_count); +#endif +} + +static int __devinit cb710_register_slot(struct cb710_chip *chip, + unsigned slot_mask, unsigned io_offset, const char *name) +{ + int nr = chip->slots; + struct cb710_slot *slot = &chip->slot[nr]; + int err; + + dev_dbg(cb710_chip_dev(chip), + "register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n", + name, chip->platform_id, nr, slot_mask, io_offset); + + /* slot->irq_handler == NULL here; this needs to be + * seen before platform_device_register() */ + ++chip->slots; + smp_wmb(); + + slot->iobase = chip->iobase + io_offset; + slot->pdev.name = name; + slot->pdev.id = chip->platform_id; + slot->pdev.dev.parent = &chip->pdev->dev; + slot->pdev.dev.release = cb710_release_slot; + + err = platform_device_register(&slot->pdev); + +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + atomic_inc(&chip->slot_refs_count); +#endif + + if (err) { + /* device_initialize() called from platform_device_register() + * wants this on error path */ + platform_device_put(&slot->pdev); + + /* slot->irq_handler == NULL here anyway, so no lock needed */ + --chip->slots; + return err; + } + + chip->slot_mask |= slot_mask; + + return 0; +} + +static void cb710_unregister_slot(struct cb710_chip *chip, + unsigned slot_mask) +{ + int nr = chip->slots - 1; + + if (!(chip->slot_mask & slot_mask)) + return; + + platform_device_unregister(&chip->slot[nr].pdev); + + /* complementary to spin_unlock() in cb710_set_irq_handler() */ + smp_rmb(); + BUG_ON(chip->slot[nr].irq_handler != NULL); + + /* slot->irq_handler == NULL here, so no lock needed */ + --chip->slots; + chip->slot_mask &= ~slot_mask; +} + +void cb710_set_irq_handler(struct cb710_slot *slot, + cb710_irq_handler_t handler) +{ + struct cb710_chip *chip = cb710_slot_to_chip(slot); + unsigned long flags; + + spin_lock_irqsave(&chip->irq_lock, flags); + slot->irq_handler = handler; + spin_unlock_irqrestore(&chip->irq_lock, flags); +} +EXPORT_SYMBOL_GPL(cb710_set_irq_handler); + +#ifdef CONFIG_PM + +static int cb710_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + + free_irq(pdev->irq, chip); + pci_save_state(pdev); + pci_disable_device(pdev); + if (state.event & PM_EVENT_SLEEP) + pci_set_power_state(pdev, PCI_D3cold); + return 0; +} + +static int cb710_resume(struct pci_dev *pdev) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + int err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + err = pcim_enable_device(pdev); + if (err) + return err; + + return devm_request_irq(&pdev->dev, pdev->irq, + cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip); +} + +#endif /* CONFIG_PM */ + +static int __devinit cb710_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct cb710_chip *chip; + unsigned long flags; + u32 val; + int err; + int n = 0; + + err = cb710_pci_configure(pdev); + if (err) + return err; + + /* this is actually magic... */ + pci_read_config_dword(pdev, 0x48, &val); + if (!(val & 0x80000000)) { + pci_write_config_dword(pdev, 0x48, val|0x71000000); + pci_read_config_dword(pdev, 0x48, &val); + } + + dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val); + if (!(val & 0x70000000)) + return -ENODEV; + val = (val >> 28) & 7; + if (val & CB710_SLOT_MMC) + ++n; + if (val & CB710_SLOT_MS) + ++n; + if (val & CB710_SLOT_SM) + ++n; + + chip = devm_kzalloc(&pdev->dev, + sizeof(*chip) + n * sizeof(*chip->slot), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME); + if (err) + return err; + + chip->pdev = pdev; + chip->iobase = pcim_iomap_table(pdev)[0]; + + pci_set_drvdata(pdev, chip); + + err = devm_request_irq(&pdev->dev, pdev->irq, + cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip); + if (err) + return err; + + do { + if (!ida_pre_get(&cb710_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock_irqsave(&cb710_ida_lock, flags); + err = ida_get_new(&cb710_ida, &chip->platform_id); + spin_unlock_irqrestore(&cb710_ida_lock, flags); + + if (err && err != -EAGAIN) + return err; + } while (err); + + + dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n", + chip->platform_id, chip->iobase, pdev->irq); + + if (val & CB710_SLOT_MMC) { /* MMC/SD slot */ + err = cb710_register_slot(chip, + CB710_SLOT_MMC, 0x00, "cb710-mmc"); + if (err) + return err; + } + + if (val & CB710_SLOT_MS) { /* MemoryStick slot */ + err = cb710_register_slot(chip, + CB710_SLOT_MS, 0x40, "cb710-ms"); + if (err) + goto unreg_mmc; + } + + if (val & CB710_SLOT_SM) { /* SmartMedia slot */ + err = cb710_register_slot(chip, + CB710_SLOT_SM, 0x60, "cb710-sm"); + if (err) + goto unreg_ms; + } + + return 0; +unreg_ms: + cb710_unregister_slot(chip, CB710_SLOT_MS); +unreg_mmc: + cb710_unregister_slot(chip, CB710_SLOT_MMC); + +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + BUG_ON(atomic_read(&chip->slot_refs_count) != 0); +#endif + return err; +} + +static void __devexit cb710_remove_one(struct pci_dev *pdev) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + unsigned long flags; + + cb710_unregister_slot(chip, CB710_SLOT_SM); + cb710_unregister_slot(chip, CB710_SLOT_MS); + cb710_unregister_slot(chip, CB710_SLOT_MMC); +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + BUG_ON(atomic_read(&chip->slot_refs_count) != 0); +#endif + + spin_lock_irqsave(&cb710_ida_lock, flags); + ida_remove(&cb710_ida, chip->platform_id); + spin_unlock_irqrestore(&cb710_ida_lock, flags); +} + +static const struct pci_device_id cb710_pci_tbl[] = { + { PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH, + PCI_ANY_ID, PCI_ANY_ID, }, + { 0, } +}; + +static struct pci_driver cb710_driver = { + .name = KBUILD_MODNAME, + .id_table = cb710_pci_tbl, + .probe = cb710_probe, + .remove = __devexit_p(cb710_remove_one), +#ifdef CONFIG_PM + .suspend = cb710_suspend, + .resume = cb710_resume, +#endif +}; + +static int __init cb710_init_module(void) +{ + return pci_register_driver(&cb710_driver); +} + +static void __exit cb710_cleanup_module(void) +{ + pci_unregister_driver(&cb710_driver); + ida_destroy(&cb710_ida); +} + +module_init(cb710_init_module); +module_exit(cb710_cleanup_module); + +MODULE_AUTHOR("MichaÅ‚ MirosÅ‚aw <mirq-linux@rere.qmqm.pl>"); +MODULE_DESCRIPTION("ENE CB710 memory card reader driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cb710_pci_tbl); diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c new file mode 100644 index 00000000000..02358d086e0 --- /dev/null +++ b/drivers/misc/cb710/debug.c @@ -0,0 +1,119 @@ +/* + * cb710/debug.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/cb710.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> + +#define CB710_REG_COUNT 0x80 + +static const u16 allow[CB710_REG_COUNT/16] = { + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, +}; +static const char *const prefix[ARRAY_SIZE(allow)] = { + "MMC", "MMC", "MMC", "MMC", + "MS?", "MS?", "SM?", "SM?" +}; + +static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits) +{ + unsigned mask = (1 << bits/8) - 1; + offset *= bits/8; + return ((allow[block] >> offset) & mask) == mask; +} + +#define CB710_READ_REGS_TEMPLATE(t) \ +static void cb710_read_regs_##t(void __iomem *iobase, \ + u##t *reg, unsigned select) \ +{ \ + unsigned i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \ + if (!(select & (1 << i))) \ + continue; \ + \ + for (j = 0; j < 0x10/(t/8); ++j) { \ + if (!allow_reg_read(i, j, t)) \ + continue; \ + reg[j] = ioread##t(iobase \ + + (i << 4) + (j * (t/8))); \ + } \ + } \ +} + +static const char cb710_regf_8[] = "%02X"; +static const char cb710_regf_16[] = "%04X"; +static const char cb710_regf_32[] = "%08X"; +static const char cb710_xes[] = "xxxxxxxx"; + +#define CB710_DUMP_REGS_TEMPLATE(t) \ +static void cb710_dump_regs_##t(struct device *dev, \ + const u##t *reg, unsigned select) \ +{ \ + const char *const xp = &cb710_xes[8 - t/4]; \ + const char *const format = cb710_regf_##t; \ + \ + char msg[100], *p; \ + unsigned i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \ + if (!(select & (1 << i))) \ + continue; \ + p = msg; \ + for (j = 0; j < 0x10/(t/8); ++j) { \ + *p++ = ' '; \ + if (j == 8/(t/8)) \ + *p++ = ' '; \ + if (allow_reg_read(i, j, t)) \ + p += sprintf(p, format, reg[j]); \ + else \ + p += sprintf(p, "%s", xp); \ + } \ + dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg); \ + } \ +} + +#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t) \ +static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip, \ + unsigned select) \ +{ \ + u##t regs[CB710_REG_COUNT/sizeof(u##t)]; \ + \ + memset(®s, 0, sizeof(regs)); \ + cb710_read_regs_##t(chip->iobase, regs, select); \ + cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select); \ +} + +#define CB710_REG_ACCESS_TEMPLATES(t) \ + CB710_READ_REGS_TEMPLATE(t) \ + CB710_DUMP_REGS_TEMPLATE(t) \ + CB710_READ_AND_DUMP_REGS_TEMPLATE(t) + +CB710_REG_ACCESS_TEMPLATES(8) +CB710_REG_ACCESS_TEMPLATES(16) +CB710_REG_ACCESS_TEMPLATES(32) + +void cb710_dump_regs(struct cb710_chip *chip, unsigned select) +{ + if (!(select & CB710_DUMP_REGS_MASK)) + select = CB710_DUMP_REGS_ALL; + if (!(select & CB710_DUMP_ACCESS_MASK)) + select |= CB710_DUMP_ACCESS_8; + + if (select & CB710_DUMP_ACCESS_32) + cb710_read_and_dump_regs_32(chip, select); + if (select & CB710_DUMP_ACCESS_16) + cb710_read_and_dump_regs_16(chip, select); + if (select & CB710_DUMP_ACCESS_8) + cb710_read_and_dump_regs_8(chip, select); +} +EXPORT_SYMBOL_GPL(cb710_dump_regs); + diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c new file mode 100644 index 00000000000..d019746551f --- /dev/null +++ b/drivers/misc/cb710/sgbuf2.c @@ -0,0 +1,146 @@ +/* + * cb710/sgbuf2.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/cb710.h> + +static bool sg_dwiter_next(struct sg_mapping_iter *miter) +{ + if (sg_miter_next(miter)) { + miter->consumed = 0; + return true; + } else + return false; +} + +static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter) +{ + return miter->length == miter->consumed && !sg_dwiter_next(miter); +} + +static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter) +{ + size_t len, left = 4; + uint32_t data; + void *addr = &data; + + do { + len = min(miter->length - miter->consumed, left); + memcpy(addr, miter->addr + miter->consumed, len); + miter->consumed += len; + left -= len; + if (!left) + return data; + addr += len; + } while (sg_dwiter_next(miter)); + + memset(addr, 0, left); + return data; +} + +static inline bool needs_unaligned_copy(const void *ptr) +{ +#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS + return false; +#else + return ((ptr - NULL) & 3) != 0; +#endif +} + +static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr) +{ + size_t len; + + if (sg_dwiter_is_at_end(miter)) + return true; + + len = miter->length - miter->consumed; + + if (likely(len >= 4 && !needs_unaligned_copy( + miter->addr + miter->consumed))) { + *ptr = miter->addr + miter->consumed; + miter->consumed += 4; + return true; + } + + return false; +} + +/** + * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer + * @miter: sg mapping iterator used for reading + * + * Description: + * Returns 32-bit word starting at byte pointed to by @miter@ + * handling any alignment issues. Bytes past the buffer's end + * are not accessed (read) but are returned as zeroes. @miter@ + * is advanced by 4 bytes or to the end of buffer whichever is + * closer. + * + * Context: + * Same requirements as in sg_miter_next(). + * + * Returns: + * 32-bit word just read. + */ +uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter) +{ + uint32_t *ptr = NULL; + + if (likely(sg_dwiter_get_next_block(miter, &ptr))) + return ptr ? *ptr : 0; + + return sg_dwiter_read_buffer(miter); +} +EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block); + +static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data) +{ + size_t len, left = 4; + void *addr = &data; + + do { + len = min(miter->length - miter->consumed, left); + memcpy(miter->addr, addr, len); + miter->consumed += len; + left -= len; + if (!left) + return; + addr += len; + } while (sg_dwiter_next(miter)); +} + +/** + * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer + * @miter: sg mapping iterator used for writing + * + * Description: + * Writes 32-bit word starting at byte pointed to by @miter@ + * handling any alignment issues. Bytes which would be written + * past the buffer's end are silently discarded. @miter@ is + * advanced by 4 bytes or to the end of buffer whichever is closer. + * + * Context: + * Same requirements as in sg_miter_next(). + */ +void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data) +{ + uint32_t *ptr = NULL; + + if (likely(sg_dwiter_get_next_block(miter, &ptr))) { + if (ptr) + *ptr = data; + else + return; + } else + sg_dwiter_write_slow(miter, data); +} +EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); + diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c new file mode 100644 index 00000000000..8110460558f --- /dev/null +++ b/drivers/misc/cs5535-mfgpt.c @@ -0,0 +1,370 @@ +/* + * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT) + * + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * Copyright (C) 2007 Andres Salomon <dilinger@debian.org> + * Copyright (C) 2009 Andres Salomon <dilinger@collabora.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/cs5535.h> + +#define DRV_NAME "cs5535-mfgpt" +#define MFGPT_BAR 2 + +static int mfgpt_reset_timers; +module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644); +MODULE_PARM_DESC(mfgptfix, "Reset the MFGPT timers during init; " + "required by some broken BIOSes (ie, TinyBIOS < 0.99)."); + +struct cs5535_mfgpt_timer { + struct cs5535_mfgpt_chip *chip; + int nr; +}; + +static struct cs5535_mfgpt_chip { + DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS); + resource_size_t base; + + struct pci_dev *pdev; + spinlock_t lock; + int initialized; +} cs5535_mfgpt_chip; + +int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp, + int event, int enable) +{ + uint32_t msr, mask, value, dummy; + int shift = (cmp == MFGPT_CMP1) ? 0 : 8; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * The register maps for these are described in sections 6.17.1.x of + * the AMD Geode CS5536 Companion Device Data Book. + */ + switch (event) { + case MFGPT_EVENT_RESET: + /* + * XXX: According to the docs, we cannot reset timers above + * 6; that is, resets for 7 and 8 will be ignored. Is this + * a problem? -dilinger + */ + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + 24); + break; + + case MFGPT_EVENT_NMI: + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + shift); + break; + + case MFGPT_EVENT_IRQ: + msr = MSR_MFGPT_IRQ; + mask = 1 << (timer->nr + shift); + break; + + default: + return -EIO; + } + + rdmsr(msr, value, dummy); + + if (enable) + value |= mask; + else + value &= ~mask; + + wrmsr(msr, value, dummy); + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event); + +int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq, + int enable) +{ + uint32_t zsel, lpc, dummy; + int shift; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) + return -EIO; + + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ; + + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + + /* All chosen and checked - go for it */ + if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; + if (enable) { + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + } + + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq); + +struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain) +{ + struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip; + struct cs5535_mfgpt_timer *timer = NULL; + unsigned long flags; + int max; + + if (!mfgpt->initialized) + goto done; + + /* only allocate timers from the working domain if requested */ + if (domain == MFGPT_DOMAIN_WORKING) + max = 6; + else + max = MFGPT_MAX_TIMERS; + + if (timer_nr >= max) { + /* programmer error. silly programmers! */ + WARN_ON(1); + goto done; + } + + spin_lock_irqsave(&mfgpt->lock, flags); + if (timer_nr < 0) { + unsigned long t; + + /* try to find any available timer */ + t = find_first_bit(mfgpt->avail, max); + /* set timer_nr to -1 if no timers available */ + timer_nr = t < max ? (int) t : -1; + } else { + /* check if the requested timer's available */ + if (test_bit(timer_nr, mfgpt->avail)) + timer_nr = -1; + } + + if (timer_nr >= 0) + /* if timer_nr is not -1, it's an available timer */ + __clear_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + + if (timer_nr < 0) + goto done; + + timer = kmalloc(sizeof(*timer), GFP_KERNEL); + if (!timer) { + /* aw hell */ + spin_lock_irqsave(&mfgpt->lock, flags); + __set_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + goto done; + } + timer->chip = mfgpt; + timer->nr = timer_nr; + dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr); + +done: + return timer; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer); + +/* + * XXX: This frees the timer memory, but never resets the actual hardware + * timer. The old geode_mfgpt code did this; it would be good to figure + * out a way to actually release the hardware timer. See comments below. + */ +void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer) +{ + kfree(timer); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer); + +uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg) +{ + return inw(timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_read); + +void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg, + uint16_t value) +{ + outw(value, timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_write); + +/* + * This is a sledgehammer that resets all MFGPT timers. This is required by + * some broken BIOSes which leave the system in an unstable state + * (TinyBIOS 0.98, for example; fixed in 0.99). It's uncertain as to + * whether or not this secret MSR can be used to release individual timers. + * Jordan tells me that he and Mitch once played w/ it, but it's unclear + * what the results of that were (and they experienced some instability). + */ +static void __init reset_all_timers(void) +{ + uint32_t val, dummy; + + /* The following undocumented bit resets the MFGPT timers */ + val = 0xFF; dummy = 0; + wrmsr(MSR_MFGPT_SETUP, val, dummy); +} + +/* + * Check whether any MFGPTs are available for the kernel to use. In most + * cases, firmware that uses AMD's VSA code will claim all timers during + * bootup; we certainly don't want to take them if they're already in use. + * In other cases (such as with VSAless OpenFirmware), the system firmware + * leaves timers available for us to use. + */ +static int __init scan_timers(struct cs5535_mfgpt_chip *mfgpt) +{ + struct cs5535_mfgpt_timer timer = { .chip = mfgpt }; + unsigned long flags; + int timers = 0; + uint16_t val; + int i; + + /* bios workaround */ + if (mfgpt_reset_timers) + reset_all_timers(); + + /* just to be safe, protect this section w/ lock */ + spin_lock_irqsave(&mfgpt->lock, flags); + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + timer.nr = i; + val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP); + if (!(val & MFGPT_SETUP_SETUP)) { + __set_bit(i, mfgpt->avail); + timers++; + } + } + spin_unlock_irqrestore(&mfgpt->lock, flags); + + return timers; +} + +static int __init cs5535_mfgpt_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + int err, t; + + /* There are two ways to get the MFGPT base address; one is by + * fetching it from MSR_LBAR_MFGPT, the other is by reading the + * PCI BAR info. The latter method is easier (especially across + * different architectures), so we'll stick with that for now. If + * it turns out to be unreliable in the face of crappy BIOSes, we + * can always go back to using MSRs.. */ + + err = pci_enable_device_io(pdev); + if (err) { + dev_err(&pdev->dev, "can't enable device IO\n"); + goto done; + } + + err = pci_request_region(pdev, MFGPT_BAR, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", MFGPT_BAR); + goto done; + } + + /* set up the driver-specific struct */ + cs5535_mfgpt_chip.base = pci_resource_start(pdev, MFGPT_BAR); + cs5535_mfgpt_chip.pdev = pdev; + spin_lock_init(&cs5535_mfgpt_chip.lock); + + dev_info(&pdev->dev, "allocated PCI BAR #%d: base 0x%llx\n", MFGPT_BAR, + (unsigned long long) cs5535_mfgpt_chip.base); + + /* detect the available timers */ + t = scan_timers(&cs5535_mfgpt_chip); + dev_info(&pdev->dev, DRV_NAME ": %d MFGPT timers available\n", t); + cs5535_mfgpt_chip.initialized = 1; + return 0; + +done: + return err; +} + +static struct pci_device_id cs5535_mfgpt_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) }, + { 0, }, +}; +MODULE_DEVICE_TABLE(pci, cs5535_mfgpt_pci_tbl); + +/* + * Just like with the cs5535-gpio driver, we can't use the standard PCI driver + * registration stuff. It only allows only one driver to bind to each PCI + * device, and we want the GPIO and MFGPT drivers to be able to share a PCI + * device. Instead, we manually scan for the PCI device, request a single + * region, and keep track of the devices that we're using. + */ + +static int __init cs5535_mfgpt_scan_pci(void) +{ + struct pci_dev *pdev; + int err = -ENODEV; + int i; + + for (i = 0; i < ARRAY_SIZE(cs5535_mfgpt_pci_tbl); i++) { + pdev = pci_get_device(cs5535_mfgpt_pci_tbl[i].vendor, + cs5535_mfgpt_pci_tbl[i].device, NULL); + if (pdev) { + err = cs5535_mfgpt_probe(pdev, + &cs5535_mfgpt_pci_tbl[i]); + if (err) + pci_dev_put(pdev); + + /* we only support a single CS5535/6 southbridge */ + break; + } + } + + return err; +} + +static int __init cs5535_mfgpt_init(void) +{ + return cs5535_mfgpt_scan_pci(); +} + +module_init(cs5535_mfgpt_init); + +MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>"); +MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c new file mode 100644 index 00000000000..f3ee4a1abb7 --- /dev/null +++ b/drivers/misc/ds1682.c @@ -0,0 +1,267 @@ +/* + * Dallas Semiconductor DS1682 Elapsed Time Recorder device driver + * + * Written by: Grant Likely <grant.likely@secretlab.ca> + * + * Copyright (C) 2007 Secret Lab Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The DS1682 elapsed timer recorder is a simple device that implements + * one elapsed time counter, one event counter, an alarm signal and 10 + * bytes of general purpose EEPROM. + * + * This driver provides access to the DS1682 counters and user data via + * the sysfs. The following attributes are added to the device node: + * elapsed_time (u32): Total elapsed event time in ms resolution + * alarm_time (u32): When elapsed time exceeds the value in alarm_time, + * then the alarm pin is asserted. + * event_count (u16): number of times the event pin has gone low. + * eeprom (u8[10]): general purpose EEPROM + * + * Counter registers and user data are both read/write unless the device + * has been write protected. This driver does not support turning off write + * protection. Once write protection is turned on, it is impossible to + * turn it off again, so I have left the feature out of this driver to avoid + * accidental enabling, but it is trivial to add write protect support. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/string.h> +#include <linux/list.h> +#include <linux/sysfs.h> +#include <linux/ctype.h> +#include <linux/hwmon-sysfs.h> + +/* Device registers */ +#define DS1682_REG_CONFIG 0x00 +#define DS1682_REG_ALARM 0x01 +#define DS1682_REG_ELAPSED 0x05 +#define DS1682_REG_EVT_CNTR 0x09 +#define DS1682_REG_EEPROM 0x0b +#define DS1682_REG_RESET 0x1d +#define DS1682_REG_WRITE_DISABLE 0x1e +#define DS1682_REG_WRITE_MEM_DISABLE 0x1f + +#define DS1682_EEPROM_SIZE 10 + +/* + * Generic counter attributes + */ +static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + __le32 val = 0; + int rc; + + dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name); + + /* Read the register */ + rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr, + (u8 *) & val); + if (rc < 0) + return -EIO; + + /* Special case: the 32 bit regs are time values with 1/4s + * resolution, scale them up to milliseconds */ + if (sattr->nr == 4) + return sprintf(buf, "%llu\n", + ((unsigned long long)le32_to_cpu(val)) * 250); + + /* Format the output string and return # of bytes */ + return sprintf(buf, "%li\n", (long)le32_to_cpu(val)); +} + +static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + char *endp; + u64 val; + __le32 val_le; + int rc; + + dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name); + + /* Decode input */ + val = simple_strtoull(buf, &endp, 0); + if (buf == endp) { + dev_dbg(dev, "input string not a number\n"); + return -EINVAL; + } + + /* Special case: the 32 bit regs are time values with 1/4s + * resolution, scale input down to quarter-seconds */ + if (sattr->nr == 4) + do_div(val, 250); + + /* write out the value */ + val_le = cpu_to_le32(val); + rc = i2c_smbus_write_i2c_block_data(client, sattr->index, sattr->nr, + (u8 *) & val_le); + if (rc < 0) { + dev_err(dev, "register write failed; reg=0x%x, size=%i\n", + sattr->index, sattr->nr); + return -EIO; + } + + return count; +} + +/* + * Simple register attributes + */ +static SENSOR_DEVICE_ATTR_2(elapsed_time, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 4, DS1682_REG_ELAPSED); +static SENSOR_DEVICE_ATTR_2(alarm_time, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 4, DS1682_REG_ALARM); +static SENSOR_DEVICE_ATTR_2(event_count, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 2, DS1682_REG_EVT_CNTR); + +static const struct attribute_group ds1682_group = { + .attrs = (struct attribute *[]) { + &sensor_dev_attr_elapsed_time.dev_attr.attr, + &sensor_dev_attr_alarm_time.dev_attr.attr, + &sensor_dev_attr_event_count.dev_attr.attr, + NULL, + }, +}; + +/* + * User data attribute + */ +static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + int rc; + + dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n", + buf, off, count); + + if (off >= DS1682_EEPROM_SIZE) + return 0; + + if (off + count > DS1682_EEPROM_SIZE) + count = DS1682_EEPROM_SIZE - off; + + rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off, + count, buf); + if (rc < 0) + return -EIO; + + return count; +} + +static ssize_t ds1682_eeprom_write(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + + dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n", + buf, off, count); + + if (off >= DS1682_EEPROM_SIZE) + return -ENOSPC; + + if (off + count > DS1682_EEPROM_SIZE) + count = DS1682_EEPROM_SIZE - off; + + /* Write out to the device */ + if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off, + count, buf) < 0) + return -EIO; + + return count; +} + +static struct bin_attribute ds1682_eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO | S_IWUSR, + }, + .size = DS1682_EEPROM_SIZE, + .read = ds1682_eeprom_read, + .write = ds1682_eeprom_write, +}; + +/* + * Called when a ds1682 device is matched with this driver + */ +static int ds1682_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rc; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_I2C_BLOCK)) { + dev_err(&client->dev, "i2c bus does not support the ds1682\n"); + rc = -ENODEV; + goto exit; + } + + rc = sysfs_create_group(&client->dev.kobj, &ds1682_group); + if (rc) + goto exit; + + rc = sysfs_create_bin_file(&client->dev.kobj, &ds1682_eeprom_attr); + if (rc) + goto exit_bin_attr; + + return 0; + + exit_bin_attr: + sysfs_remove_group(&client->dev.kobj, &ds1682_group); + exit: + return rc; +} + +static int ds1682_remove(struct i2c_client *client) +{ + sysfs_remove_bin_file(&client->dev.kobj, &ds1682_eeprom_attr); + sysfs_remove_group(&client->dev.kobj, &ds1682_group); + return 0; +} + +static const struct i2c_device_id ds1682_id[] = { + { "ds1682", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ds1682_id); + +static struct i2c_driver ds1682_driver = { + .driver = { + .name = "ds1682", + }, + .probe = ds1682_probe, + .remove = ds1682_remove, + .id_table = ds1682_id, +}; + +static int __init ds1682_init(void) +{ + return i2c_add_driver(&ds1682_driver); +} + +static void __exit ds1682_exit(void) +{ + i2c_del_driver(&ds1682_driver); +} + +MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>"); +MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver"); +MODULE_LICENSE("GPL"); + +module_init(ds1682_init); +module_exit(ds1682_exit); diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig new file mode 100644 index 00000000000..9118613af32 --- /dev/null +++ b/drivers/misc/eeprom/Kconfig @@ -0,0 +1,73 @@ +menu "EEPROM support" + +config EEPROM_AT24 + tristate "I2C EEPROMs from most vendors" + depends on I2C && SYSFS + help + Enable this driver to get read/write support to most I2C EEPROMs, + after you configure the driver to know about each EEPROM on + your target board. Use these generic chip names, instead of + vendor-specific ones like at24c64 or 24lc02: + + 24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08, + 24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024 + + Unless you like data loss puzzles, always be sure that any chip + you configure as a 24c32 (32 kbit) or larger is NOT really a + 24c16 (16 kbit) or smaller, and vice versa. Marking the chip + as read-only won't help recover from this. Also, if your chip + has any software write-protect mechanism you may want to review the + code to make sure this driver won't turn it on by accident. + + If you use this with an SMBus adapter instead of an I2C adapter, + full functionality is not available. Only smaller devices are + supported (24c16 and below, max 4 kByte). + + This driver can also be built as a module. If so, the module + will be called at24. + +config EEPROM_AT25 + tristate "SPI EEPROMs from most vendors" + depends on SPI && SYSFS + help + Enable this driver to get read/write support to most SPI EEPROMs, + after you configure the board init code to know about each eeprom + on your target board. + + This driver can also be built as a module. If so, the module + will be called at25. + +config EEPROM_LEGACY + tristate "Old I2C EEPROM reader" + depends on I2C && SYSFS + help + If you say yes here you get read-only access to the EEPROM data + available on modern memory DIMMs and Sony Vaio laptops via I2C. Such + EEPROMs could theoretically be available on other devices as well. + + This driver can also be built as a module. If so, the module + will be called eeprom. + +config EEPROM_MAX6875 + tristate "Maxim MAX6874/5 power supply supervisor" + depends on I2C && EXPERIMENTAL + help + If you say yes here you get read-only support for the user EEPROM of + the Maxim MAX6874/5 EEPROM-programmable, quad power-supply + sequencer/supervisor. + + All other features of this chip should be accessed via i2c-dev. + + This driver can also be built as a module. If so, the module + will be called max6875. + + +config EEPROM_93CX6 + tristate "EEPROM 93CX6 support" + help + This is a driver for the EEPROM chipsets 93c46 and 93c66. + The driver supports both read as well as write commands. + + If unsure, say N. + +endmenu diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile new file mode 100644 index 00000000000..df3d68ffa9d --- /dev/null +++ b/drivers/misc/eeprom/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_EEPROM_AT24) += at24.o +obj-$(CONFIG_EEPROM_AT25) += at25.o +obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o +obj-$(CONFIG_EEPROM_MAX6875) += max6875.o +obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c new file mode 100644 index 00000000000..2cb2736d65a --- /dev/null +++ b/drivers/misc/eeprom/at24.c @@ -0,0 +1,637 @@ +/* + * at24.c - handle most I2C EEPROMs + * + * Copyright (C) 2005-2007 David Brownell + * Copyright (C) 2008 Wolfram Sang, Pengutronix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/sysfs.h> +#include <linux/mod_devicetable.h> +#include <linux/log2.h> +#include <linux/bitops.h> +#include <linux/jiffies.h> +#include <linux/i2c.h> +#include <linux/i2c/at24.h> + +/* + * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable. + * Differences between different vendor product lines (like Atmel AT24C or + * MicroChip 24LC, etc) won't much matter for typical read/write access. + * There are also I2C RAM chips, likewise interchangeable. One example + * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes). + * + * However, misconfiguration can lose data. "Set 16-bit memory address" + * to a part with 8-bit addressing will overwrite data. Writing with too + * big a page size also loses data. And it's not safe to assume that the + * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC + * uses 0x51, for just one example. + * + * Accordingly, explicit board-specific configuration data should be used + * in almost all cases. (One partial exception is an SMBus used to access + * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.) + * + * So this driver uses "new style" I2C driver binding, expecting to be + * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or + * similar kernel-resident tables; or, configuration data coming from + * a bootloader. + * + * Other than binding model, current differences from "eeprom" driver are + * that this one handles write access and isn't restricted to 24c02 devices. + * It also handles larger devices (32 kbit and up) with two-byte addresses, + * which won't work on pure SMBus systems. + */ + +struct at24_data { + struct at24_platform_data chip; + struct memory_accessor macc; + bool use_smbus; + + /* + * Lock protects against activities from other Linux tasks, + * but not from changes by other I2C masters. + */ + struct mutex lock; + struct bin_attribute bin; + + u8 *writebuf; + unsigned write_max; + unsigned num_addresses; + + /* + * Some chips tie up multiple I2C addresses; dummy devices reserve + * them for us, and we'll use them with SMBus calls. + */ + struct i2c_client *client[]; +}; + +/* + * This parameter is to help this driver avoid blocking other drivers out + * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C + * clock, one 256 byte read takes about 1/43 second which is excessive; + * but the 1/170 second it takes at 400 kHz may be quite reasonable; and + * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible. + * + * This value is forced to be a power of two so that writes align on pages. + */ +static unsigned io_limit = 128; +module_param(io_limit, uint, 0); +MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)"); + +/* + * Specs often allow 5 msec for a page write, sometimes 20 msec; + * it's important to recover from write timeouts. + */ +static unsigned write_timeout = 25; +module_param(write_timeout, uint, 0); +MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)"); + +#define AT24_SIZE_BYTELEN 5 +#define AT24_SIZE_FLAGS 8 + +#define AT24_BITMASK(x) (BIT(x) - 1) + +/* create non-zero magic value for given eeprom parameters */ +#define AT24_DEVICE_MAGIC(_len, _flags) \ + ((1 << AT24_SIZE_FLAGS | (_flags)) \ + << AT24_SIZE_BYTELEN | ilog2(_len)) + +static const struct i2c_device_id at24_ids[] = { + /* needs 8 addresses as A0-A2 are ignored */ + { "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) }, + /* old variants can't be handled with this generic entry! */ + { "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) }, + { "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) }, + /* spd is a 24c02 in memory DIMMs */ + { "spd", AT24_DEVICE_MAGIC(2048 / 8, + AT24_FLAG_READONLY | AT24_FLAG_IRUGO) }, + { "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) }, + /* 24rf08 quirk is handled at i2c-core */ + { "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) }, + { "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) }, + { "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) }, + { "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) }, + { "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) }, + { "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) }, + { "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) }, + { "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) }, + { "at24", 0 }, + { /* END OF LIST */ } +}; +MODULE_DEVICE_TABLE(i2c, at24_ids); + +/*-------------------------------------------------------------------------*/ + +/* + * This routine supports chips which consume multiple I2C addresses. It + * computes the addressing information to be used for a given r/w request. + * Assumes that sanity checks for offset happened at sysfs-layer. + */ +static struct i2c_client *at24_translate_offset(struct at24_data *at24, + unsigned *offset) +{ + unsigned i; + + if (at24->chip.flags & AT24_FLAG_ADDR16) { + i = *offset >> 16; + *offset &= 0xffff; + } else { + i = *offset >> 8; + *offset &= 0xff; + } + + return at24->client[i]; +} + +static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf, + unsigned offset, size_t count) +{ + struct i2c_msg msg[2]; + u8 msgbuf[2]; + struct i2c_client *client; + unsigned long timeout, read_time; + int status, i; + + memset(msg, 0, sizeof(msg)); + + /* + * REVISIT some multi-address chips don't rollover page reads to + * the next slave address, so we may need to truncate the count. + * Those chips might need another quirk flag. + * + * If the real hardware used four adjacent 24c02 chips and that + * were misconfigured as one 24c08, that would be a similar effect: + * one "eeprom" file not four, but larger reads would fail when + * they crossed certain pages. + */ + + /* + * Slave address and byte offset derive from the offset. Always + * set the byte address; on a multi-master board, another master + * may have changed the chip's "current" address pointer. + */ + client = at24_translate_offset(at24, &offset); + + if (count > io_limit) + count = io_limit; + + if (at24->use_smbus) { + /* Smaller eeproms can work given some SMBus extension calls */ + if (count > I2C_SMBUS_BLOCK_MAX) + count = I2C_SMBUS_BLOCK_MAX; + } else { + /* + * When we have a better choice than SMBus calls, use a + * combined I2C message. Write address; then read up to + * io_limit data bytes. Note that read page rollover helps us + * here (unlike writes). msgbuf is u8 and will cast to our + * needs. + */ + i = 0; + if (at24->chip.flags & AT24_FLAG_ADDR16) + msgbuf[i++] = offset >> 8; + msgbuf[i++] = offset; + + msg[0].addr = client->addr; + msg[0].buf = msgbuf; + msg[0].len = i; + + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].buf = buf; + msg[1].len = count; + } + + /* + * Reads fail if the previous write didn't complete yet. We may + * loop a few times until this one succeeds, waiting at least + * long enough for one entire page write to work. + */ + timeout = jiffies + msecs_to_jiffies(write_timeout); + do { + read_time = jiffies; + if (at24->use_smbus) { + status = i2c_smbus_read_i2c_block_data(client, offset, + count, buf); + } else { + status = i2c_transfer(client->adapter, msg, 2); + if (status == 2) + status = count; + } + dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n", + count, offset, status, jiffies); + + if (status == count) + return count; + + /* REVISIT: at HZ=100, this is sloooow */ + msleep(1); + } while (time_before(read_time, timeout)); + + return -ETIMEDOUT; +} + +static ssize_t at24_read(struct at24_data *at24, + char *buf, loff_t off, size_t count) +{ + ssize_t retval = 0; + + if (unlikely(!count)) + return count; + + /* + * Read data from chip, protecting against concurrent updates + * from this host, but not from other I2C masters. + */ + mutex_lock(&at24->lock); + + while (count) { + ssize_t status; + + status = at24_eeprom_read(at24, buf, off, count); + if (status <= 0) { + if (retval == 0) + retval = status; + break; + } + buf += status; + off += status; + count -= status; + retval += status; + } + + mutex_unlock(&at24->lock); + + return retval; +} + +static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct at24_data *at24; + + at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); + return at24_read(at24, buf, off, count); +} + + +/* + * Note that if the hardware write-protect pin is pulled high, the whole + * chip is normally write protected. But there are plenty of product + * variants here, including OTP fuses and partial chip protect. + * + * We only use page mode writes; the alternative is sloooow. This routine + * writes at most one page. + */ +static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf, + unsigned offset, size_t count) +{ + struct i2c_client *client; + struct i2c_msg msg; + ssize_t status; + unsigned long timeout, write_time; + unsigned next_page; + + /* Get corresponding I2C address and adjust offset */ + client = at24_translate_offset(at24, &offset); + + /* write_max is at most a page */ + if (count > at24->write_max) + count = at24->write_max; + + /* Never roll over backwards, to the start of this page */ + next_page = roundup(offset + 1, at24->chip.page_size); + if (offset + count > next_page) + count = next_page - offset; + + /* If we'll use I2C calls for I/O, set up the message */ + if (!at24->use_smbus) { + int i = 0; + + msg.addr = client->addr; + msg.flags = 0; + + /* msg.buf is u8 and casts will mask the values */ + msg.buf = at24->writebuf; + if (at24->chip.flags & AT24_FLAG_ADDR16) + msg.buf[i++] = offset >> 8; + + msg.buf[i++] = offset; + memcpy(&msg.buf[i], buf, count); + msg.len = i + count; + } + + /* + * Writes fail if the previous one didn't complete yet. We may + * loop a few times until this one succeeds, waiting at least + * long enough for one entire page write to work. + */ + timeout = jiffies + msecs_to_jiffies(write_timeout); + do { + write_time = jiffies; + if (at24->use_smbus) { + status = i2c_smbus_write_i2c_block_data(client, + offset, count, buf); + if (status == 0) + status = count; + } else { + status = i2c_transfer(client->adapter, &msg, 1); + if (status == 1) + status = count; + } + dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n", + count, offset, status, jiffies); + + if (status == count) + return count; + + /* REVISIT: at HZ=100, this is sloooow */ + msleep(1); + } while (time_before(write_time, timeout)); + + return -ETIMEDOUT; +} + +static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off, + size_t count) +{ + ssize_t retval = 0; + + if (unlikely(!count)) + return count; + + /* + * Write data to chip, protecting against concurrent updates + * from this host, but not from other I2C masters. + */ + mutex_lock(&at24->lock); + + while (count) { + ssize_t status; + + status = at24_eeprom_write(at24, buf, off, count); + if (status <= 0) { + if (retval == 0) + retval = status; + break; + } + buf += status; + off += status; + count -= status; + retval += status; + } + + mutex_unlock(&at24->lock); + + return retval; +} + +static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct at24_data *at24; + + at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); + return at24_write(at24, buf, off, count); +} + +/*-------------------------------------------------------------------------*/ + +/* + * This lets other kernel code access the eeprom data. For example, it + * might hold a board's Ethernet address, or board-specific calibration + * data generated on the manufacturing floor. + */ + +static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf, + off_t offset, size_t count) +{ + struct at24_data *at24 = container_of(macc, struct at24_data, macc); + + return at24_read(at24, buf, offset, count); +} + +static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf, + off_t offset, size_t count) +{ + struct at24_data *at24 = container_of(macc, struct at24_data, macc); + + return at24_write(at24, buf, offset, count); +} + +/*-------------------------------------------------------------------------*/ + +static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) +{ + struct at24_platform_data chip; + bool writable; + bool use_smbus = false; + struct at24_data *at24; + int err; + unsigned i, num_addresses; + kernel_ulong_t magic; + + if (client->dev.platform_data) { + chip = *(struct at24_platform_data *)client->dev.platform_data; + } else { + if (!id->driver_data) { + err = -ENODEV; + goto err_out; + } + magic = id->driver_data; + chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN)); + magic >>= AT24_SIZE_BYTELEN; + chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS); + /* + * This is slow, but we can't know all eeproms, so we better + * play safe. Specifying custom eeprom-types via platform_data + * is recommended anyhow. + */ + chip.page_size = 1; + + chip.setup = NULL; + chip.context = NULL; + } + + if (!is_power_of_2(chip.byte_len)) + dev_warn(&client->dev, + "byte_len looks suspicious (no power of 2)!\n"); + if (!is_power_of_2(chip.page_size)) + dev_warn(&client->dev, + "page_size looks suspicious (no power of 2)!\n"); + + /* Use I2C operations unless we're stuck with SMBus extensions. */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + if (chip.flags & AT24_FLAG_ADDR16) { + err = -EPFNOSUPPORT; + goto err_out; + } + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + err = -EPFNOSUPPORT; + goto err_out; + } + use_smbus = true; + } + + if (chip.flags & AT24_FLAG_TAKE8ADDR) + num_addresses = 8; + else + num_addresses = DIV_ROUND_UP(chip.byte_len, + (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256); + + at24 = kzalloc(sizeof(struct at24_data) + + num_addresses * sizeof(struct i2c_client *), GFP_KERNEL); + if (!at24) { + err = -ENOMEM; + goto err_out; + } + + mutex_init(&at24->lock); + at24->use_smbus = use_smbus; + at24->chip = chip; + at24->num_addresses = num_addresses; + + /* + * Export the EEPROM bytes through sysfs, since that's convenient. + * By default, only root should see the data (maybe passwords etc) + */ + at24->bin.attr.name = "eeprom"; + at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR; + at24->bin.read = at24_bin_read; + at24->bin.size = chip.byte_len; + + at24->macc.read = at24_macc_read; + + writable = !(chip.flags & AT24_FLAG_READONLY); + if (writable) { + if (!use_smbus || i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) { + + unsigned write_max = chip.page_size; + + at24->macc.write = at24_macc_write; + + at24->bin.write = at24_bin_write; + at24->bin.attr.mode |= S_IWUSR; + + if (write_max > io_limit) + write_max = io_limit; + if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX) + write_max = I2C_SMBUS_BLOCK_MAX; + at24->write_max = write_max; + + /* buffer (data + address at the beginning) */ + at24->writebuf = kmalloc(write_max + 2, GFP_KERNEL); + if (!at24->writebuf) { + err = -ENOMEM; + goto err_struct; + } + } else { + dev_warn(&client->dev, + "cannot write due to controller restrictions."); + } + } + + at24->client[0] = client; + + /* use dummy devices for multiple-address chips */ + for (i = 1; i < num_addresses; i++) { + at24->client[i] = i2c_new_dummy(client->adapter, + client->addr + i); + if (!at24->client[i]) { + dev_err(&client->dev, "address 0x%02x unavailable\n", + client->addr + i); + err = -EADDRINUSE; + goto err_clients; + } + } + + err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin); + if (err) + goto err_clients; + + i2c_set_clientdata(client, at24); + + dev_info(&client->dev, "%zu byte %s EEPROM %s\n", + at24->bin.size, client->name, + writable ? "(writable)" : "(read-only)"); + dev_dbg(&client->dev, + "page_size %d, num_addresses %d, write_max %d%s\n", + chip.page_size, num_addresses, + at24->write_max, + use_smbus ? ", use_smbus" : ""); + + /* export data to kernel code */ + if (chip.setup) + chip.setup(&at24->macc, chip.context); + + return 0; + +err_clients: + for (i = 1; i < num_addresses; i++) + if (at24->client[i]) + i2c_unregister_device(at24->client[i]); + + kfree(at24->writebuf); +err_struct: + kfree(at24); +err_out: + dev_dbg(&client->dev, "probe error %d\n", err); + return err; +} + +static int __devexit at24_remove(struct i2c_client *client) +{ + struct at24_data *at24; + int i; + + at24 = i2c_get_clientdata(client); + sysfs_remove_bin_file(&client->dev.kobj, &at24->bin); + + for (i = 1; i < at24->num_addresses; i++) + i2c_unregister_device(at24->client[i]); + + kfree(at24->writebuf); + kfree(at24); + i2c_set_clientdata(client, NULL); + return 0; +} + +/*-------------------------------------------------------------------------*/ + +static struct i2c_driver at24_driver = { + .driver = { + .name = "at24", + .owner = THIS_MODULE, + }, + .probe = at24_probe, + .remove = __devexit_p(at24_remove), + .id_table = at24_ids, +}; + +static int __init at24_init(void) +{ + io_limit = rounddown_pow_of_two(io_limit); + return i2c_add_driver(&at24_driver); +} +module_init(at24_init); + +static void __exit at24_exit(void) +{ + i2c_del_driver(&at24_driver); +} +module_exit(at24_exit); + +MODULE_DESCRIPTION("Driver for most I2C EEPROMs"); +MODULE_AUTHOR("David Brownell and Wolfram Sang"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c new file mode 100644 index 00000000000..d902d81dde3 --- /dev/null +++ b/drivers/misc/eeprom/at25.c @@ -0,0 +1,420 @@ +/* + * at25.c -- support most SPI EEPROMs, such as Atmel AT25 models + * + * Copyright (C) 2006 David Brownell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/sched.h> + +#include <linux/spi/spi.h> +#include <linux/spi/eeprom.h> + + +/* + * NOTE: this is an *EEPROM* driver. The vagaries of product naming + * mean that some AT25 products are EEPROMs, and others are FLASH. + * Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver, + * not this one! + */ + +struct at25_data { + struct spi_device *spi; + struct memory_accessor mem; + struct mutex lock; + struct spi_eeprom chip; + struct bin_attribute bin; + unsigned addrlen; +}; + +#define AT25_WREN 0x06 /* latch the write enable */ +#define AT25_WRDI 0x04 /* reset the write enable */ +#define AT25_RDSR 0x05 /* read status register */ +#define AT25_WRSR 0x01 /* write status register */ +#define AT25_READ 0x03 /* read byte(s) */ +#define AT25_WRITE 0x02 /* write byte(s)/sector */ + +#define AT25_SR_nRDY 0x01 /* nRDY = write-in-progress */ +#define AT25_SR_WEN 0x02 /* write enable (latched) */ +#define AT25_SR_BP0 0x04 /* BP for software writeprotect */ +#define AT25_SR_BP1 0x08 +#define AT25_SR_WPEN 0x80 /* writeprotect enable */ + + +#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ + +/* Specs often allow 5 msec for a page write, sometimes 20 msec; + * it's important to recover from write timeouts. + */ +#define EE_TIMEOUT 25 + +/*-------------------------------------------------------------------------*/ + +#define io_limit PAGE_SIZE /* bytes */ + +static ssize_t +at25_ee_read( + struct at25_data *at25, + char *buf, + unsigned offset, + size_t count +) +{ + u8 command[EE_MAXADDRLEN + 1]; + u8 *cp; + ssize_t status; + struct spi_transfer t[2]; + struct spi_message m; + + if (unlikely(offset >= at25->bin.size)) + return 0; + if ((offset + count) > at25->bin.size) + count = at25->bin.size - offset; + if (unlikely(!count)) + return count; + + cp = command; + *cp++ = AT25_READ; + + /* 8/16/24-bit address is written MSB first */ + switch (at25->addrlen) { + default: /* case 3 */ + *cp++ = offset >> 16; + case 2: + *cp++ = offset >> 8; + case 1: + case 0: /* can't happen: for better codegen */ + *cp++ = offset >> 0; + } + + spi_message_init(&m); + memset(t, 0, sizeof t); + + t[0].tx_buf = command; + t[0].len = at25->addrlen + 1; + spi_message_add_tail(&t[0], &m); + + t[1].rx_buf = buf; + t[1].len = count; + spi_message_add_tail(&t[1], &m); + + mutex_lock(&at25->lock); + + /* Read it all at once. + * + * REVISIT that's potentially a problem with large chips, if + * other devices on the bus need to be accessed regularly or + * this chip is clocked very slowly + */ + status = spi_sync(at25->spi, &m); + dev_dbg(&at25->spi->dev, + "read %Zd bytes at %d --> %d\n", + count, offset, (int) status); + + mutex_unlock(&at25->lock); + return status ? status : count; +} + +static ssize_t +at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct device *dev; + struct at25_data *at25; + + dev = container_of(kobj, struct device, kobj); + at25 = dev_get_drvdata(dev); + + return at25_ee_read(at25, buf, off, count); +} + + +static ssize_t +at25_ee_write(struct at25_data *at25, const char *buf, loff_t off, + size_t count) +{ + ssize_t status = 0; + unsigned written = 0; + unsigned buf_size; + u8 *bounce; + + if (unlikely(off >= at25->bin.size)) + return -EFBIG; + if ((off + count) > at25->bin.size) + count = at25->bin.size - off; + if (unlikely(!count)) + return count; + + /* Temp buffer starts with command and address */ + buf_size = at25->chip.page_size; + if (buf_size > io_limit) + buf_size = io_limit; + bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL); + if (!bounce) + return -ENOMEM; + + /* For write, rollover is within the page ... so we write at + * most one page, then manually roll over to the next page. + */ + bounce[0] = AT25_WRITE; + mutex_lock(&at25->lock); + do { + unsigned long timeout, retries; + unsigned segment; + unsigned offset = (unsigned) off; + u8 *cp = bounce + 1; + int sr; + + *cp = AT25_WREN; + status = spi_write(at25->spi, cp, 1); + if (status < 0) { + dev_dbg(&at25->spi->dev, "WREN --> %d\n", + (int) status); + break; + } + + /* 8/16/24-bit address is written MSB first */ + switch (at25->addrlen) { + default: /* case 3 */ + *cp++ = offset >> 16; + case 2: + *cp++ = offset >> 8; + case 1: + case 0: /* can't happen: for better codegen */ + *cp++ = offset >> 0; + } + + /* Write as much of a page as we can */ + segment = buf_size - (offset % buf_size); + if (segment > count) + segment = count; + memcpy(cp, buf, segment); + status = spi_write(at25->spi, bounce, + segment + at25->addrlen + 1); + dev_dbg(&at25->spi->dev, + "write %u bytes at %u --> %d\n", + segment, offset, (int) status); + if (status < 0) + break; + + /* REVISIT this should detect (or prevent) failed writes + * to readonly sections of the EEPROM... + */ + + /* Wait for non-busy status */ + timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT); + retries = 0; + do { + + sr = spi_w8r8(at25->spi, AT25_RDSR); + if (sr < 0 || (sr & AT25_SR_nRDY)) { + dev_dbg(&at25->spi->dev, + "rdsr --> %d (%02x)\n", sr, sr); + /* at HZ=100, this is sloooow */ + msleep(1); + continue; + } + if (!(sr & AT25_SR_nRDY)) + break; + } while (retries++ < 3 || time_before_eq(jiffies, timeout)); + + if ((sr < 0) || (sr & AT25_SR_nRDY)) { + dev_err(&at25->spi->dev, + "write %d bytes offset %d, " + "timeout after %u msecs\n", + segment, offset, + jiffies_to_msecs(jiffies - + (timeout - EE_TIMEOUT))); + status = -ETIMEDOUT; + break; + } + + off += segment; + buf += segment; + count -= segment; + written += segment; + + } while (count > 0); + + mutex_unlock(&at25->lock); + + kfree(bounce); + return written ? written : status; +} + +static ssize_t +at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct device *dev; + struct at25_data *at25; + + dev = container_of(kobj, struct device, kobj); + at25 = dev_get_drvdata(dev); + + return at25_ee_write(at25, buf, off, count); +} + +/*-------------------------------------------------------------------------*/ + +/* Let in-kernel code access the eeprom data. */ + +static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf, + off_t offset, size_t count) +{ + struct at25_data *at25 = container_of(mem, struct at25_data, mem); + + return at25_ee_read(at25, buf, offset, count); +} + +static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf, + off_t offset, size_t count) +{ + struct at25_data *at25 = container_of(mem, struct at25_data, mem); + + return at25_ee_write(at25, buf, offset, count); +} + +/*-------------------------------------------------------------------------*/ + +static int at25_probe(struct spi_device *spi) +{ + struct at25_data *at25 = NULL; + const struct spi_eeprom *chip; + int err; + int sr; + int addrlen; + + /* Chip description */ + chip = spi->dev.platform_data; + if (!chip) { + dev_dbg(&spi->dev, "no chip description\n"); + err = -ENODEV; + goto fail; + } + + /* For now we only support 8/16/24 bit addressing */ + if (chip->flags & EE_ADDR1) + addrlen = 1; + else if (chip->flags & EE_ADDR2) + addrlen = 2; + else if (chip->flags & EE_ADDR3) + addrlen = 3; + else { + dev_dbg(&spi->dev, "unsupported address type\n"); + err = -EINVAL; + goto fail; + } + + /* Ping the chip ... the status register is pretty portable, + * unlike probing manufacturer IDs. We do expect that system + * firmware didn't write it in the past few milliseconds! + */ + sr = spi_w8r8(spi, AT25_RDSR); + if (sr < 0 || sr & AT25_SR_nRDY) { + dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr); + err = -ENXIO; + goto fail; + } + + if (!(at25 = kzalloc(sizeof *at25, GFP_KERNEL))) { + err = -ENOMEM; + goto fail; + } + + mutex_init(&at25->lock); + at25->chip = *chip; + at25->spi = spi_dev_get(spi); + dev_set_drvdata(&spi->dev, at25); + at25->addrlen = addrlen; + + /* Export the EEPROM bytes through sysfs, since that's convenient. + * And maybe to other kernel code; it might hold a board's Ethernet + * address, or board-specific calibration data generated on the + * manufacturing floor. + * + * Default to root-only access to the data; EEPROMs often hold data + * that's sensitive for read and/or write, like ethernet addresses, + * security codes, board-specific manufacturing calibrations, etc. + */ + at25->bin.attr.name = "eeprom"; + at25->bin.attr.mode = S_IRUSR; + at25->bin.read = at25_bin_read; + at25->mem.read = at25_mem_read; + + at25->bin.size = at25->chip.byte_len; + if (!(chip->flags & EE_READONLY)) { + at25->bin.write = at25_bin_write; + at25->bin.attr.mode |= S_IWUSR; + at25->mem.write = at25_mem_write; + } + + err = sysfs_create_bin_file(&spi->dev.kobj, &at25->bin); + if (err) + goto fail; + + if (chip->setup) + chip->setup(&at25->mem, chip->context); + + dev_info(&spi->dev, "%Zd %s %s eeprom%s, pagesize %u\n", + (at25->bin.size < 1024) + ? at25->bin.size + : (at25->bin.size / 1024), + (at25->bin.size < 1024) ? "Byte" : "KByte", + at25->chip.name, + (chip->flags & EE_READONLY) ? " (readonly)" : "", + at25->chip.page_size); + return 0; +fail: + dev_dbg(&spi->dev, "probe err %d\n", err); + kfree(at25); + return err; +} + +static int __devexit at25_remove(struct spi_device *spi) +{ + struct at25_data *at25; + + at25 = dev_get_drvdata(&spi->dev); + sysfs_remove_bin_file(&spi->dev.kobj, &at25->bin); + kfree(at25); + return 0; +} + +/*-------------------------------------------------------------------------*/ + +static struct spi_driver at25_driver = { + .driver = { + .name = "at25", + .owner = THIS_MODULE, + }, + .probe = at25_probe, + .remove = __devexit_p(at25_remove), +}; + +static int __init at25_init(void) +{ + return spi_register_driver(&at25_driver); +} +module_init(at25_init); + +static void __exit at25_exit(void) +{ + spi_unregister_driver(&at25_driver); +} +module_exit(at25_exit); + +MODULE_DESCRIPTION("Driver for most SPI EEPROMs"); +MODULE_AUTHOR("David Brownell"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("spi:at25"); diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c new file mode 100644 index 00000000000..f939ebc2507 --- /dev/null +++ b/drivers/misc/eeprom/eeprom.c @@ -0,0 +1,253 @@ +/* + Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl> and + Philip Edelbrock <phil@netroedge.com> + Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com> + Copyright (C) 2003 IBM Corp. + Copyright (C) 2004 Jean Delvare <khali@linux-fr.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/i2c.h> +#include <linux/mutex.h> + +/* Addresses to scan */ +static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, I2C_CLIENT_END }; + + +/* Size of EEPROM in bytes */ +#define EEPROM_SIZE 256 + +/* possible types of eeprom devices */ +enum eeprom_nature { + UNKNOWN, + VAIO, +}; + +/* Each client has this additional data */ +struct eeprom_data { + struct mutex update_lock; + u8 valid; /* bitfield, bit!=0 if slice is valid */ + unsigned long last_updated[8]; /* In jiffies, 8 slices */ + u8 data[EEPROM_SIZE]; /* Register values */ + enum eeprom_nature nature; +}; + + +static void eeprom_update_client(struct i2c_client *client, u8 slice) +{ + struct eeprom_data *data = i2c_get_clientdata(client); + int i; + + mutex_lock(&data->update_lock); + + if (!(data->valid & (1 << slice)) || + time_after(jiffies, data->last_updated[slice] + 300 * HZ)) { + dev_dbg(&client->dev, "Starting eeprom update, slice %u\n", slice); + + if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + for (i = slice << 5; i < (slice + 1) << 5; i += 32) + if (i2c_smbus_read_i2c_block_data(client, i, + 32, data->data + i) + != 32) + goto exit; + } else { + for (i = slice << 5; i < (slice + 1) << 5; i += 2) { + int word = i2c_smbus_read_word_data(client, i); + if (word < 0) + goto exit; + data->data[i] = word & 0xff; + data->data[i + 1] = word >> 8; + } + } + data->last_updated[slice] = jiffies; + data->valid |= (1 << slice); + } +exit: + mutex_unlock(&data->update_lock); +} + +static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj)); + struct eeprom_data *data = i2c_get_clientdata(client); + u8 slice; + + if (off > EEPROM_SIZE) + return 0; + if (off + count > EEPROM_SIZE) + count = EEPROM_SIZE - off; + + /* Only refresh slices which contain requested bytes */ + for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++) + eeprom_update_client(client, slice); + + /* Hide Vaio private settings to regular users: + - BIOS passwords: bytes 0x00 to 0x0f + - UUID: bytes 0x10 to 0x1f + - Serial number: 0xc0 to 0xdf */ + if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) { + int i; + + for (i = 0; i < count; i++) { + if ((off + i <= 0x1f) || + (off + i >= 0xc0 && off + i <= 0xdf)) + buf[i] = 0; + else + buf[i] = data->data[off + i]; + } + } else { + memcpy(buf, &data->data[off], count); + } + + return count; +} + +static struct bin_attribute eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO, + }, + .size = EEPROM_SIZE, + .read = eeprom_read, +}; + +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + + /* EDID EEPROMs are often 24C00 EEPROMs, which answer to all + addresses 0x50-0x57, but we only care about 0x50. So decline + attaching to addresses >= 0x51 on DDC buses */ + if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51) + return -ENODEV; + + /* There are four ways we can read the EEPROM data: + (1) I2C block reads (faster, but unsupported by most adapters) + (2) Word reads (128% overhead) + (3) Consecutive byte reads (88% overhead, unsafe) + (4) Regular byte data reads (265% overhead) + The third and fourth methods are not implemented by this driver + because all known adapters support one of the first two. */ + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA) + && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) + return -ENODEV; + + strlcpy(info->type, "eeprom", I2C_NAME_SIZE); + + return 0; +} + +static int eeprom_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = client->adapter; + struct eeprom_data *data; + int err; + + if (!(data = kzalloc(sizeof(struct eeprom_data), GFP_KERNEL))) { + err = -ENOMEM; + goto exit; + } + + memset(data->data, 0xff, EEPROM_SIZE); + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + data->nature = UNKNOWN; + + /* Detect the Vaio nature of EEPROMs. + We use the "PCG-" or "VGN-" prefix as the signature. */ + if (client->addr == 0x57 + && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + char name[4]; + + name[0] = i2c_smbus_read_byte_data(client, 0x80); + name[1] = i2c_smbus_read_byte_data(client, 0x81); + name[2] = i2c_smbus_read_byte_data(client, 0x82); + name[3] = i2c_smbus_read_byte_data(client, 0x83); + + if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) { + dev_info(&client->dev, "Vaio EEPROM detected, " + "enabling privacy protection\n"); + data->nature = VAIO; + } + } + + /* create the sysfs eeprom file */ + err = sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr); + if (err) + goto exit_kfree; + + return 0; + +exit_kfree: + kfree(data); +exit: + return err; +} + +static int eeprom_remove(struct i2c_client *client) +{ + sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr); + kfree(i2c_get_clientdata(client)); + + return 0; +} + +static const struct i2c_device_id eeprom_id[] = { + { "eeprom", 0 }, + { } +}; + +static struct i2c_driver eeprom_driver = { + .driver = { + .name = "eeprom", + }, + .probe = eeprom_probe, + .remove = eeprom_remove, + .id_table = eeprom_id, + + .class = I2C_CLASS_DDC | I2C_CLASS_SPD, + .detect = eeprom_detect, + .address_list = normal_i2c, +}; + +static int __init eeprom_init(void) +{ + return i2c_add_driver(&eeprom_driver); +} + +static void __exit eeprom_exit(void) +{ + i2c_del_driver(&eeprom_driver); +} + + +MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl> and " + "Philip Edelbrock <phil@netroedge.com> and " + "Greg Kroah-Hartman <greg@kroah.com>"); +MODULE_DESCRIPTION("I2C EEPROM driver"); +MODULE_LICENSE("GPL"); + +module_init(eeprom_init); +module_exit(eeprom_exit); diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c new file mode 100644 index 00000000000..15b1780025c --- /dev/null +++ b/drivers/misc/eeprom/eeprom_93cx6.c @@ -0,0 +1,240 @@ +/* + Copyright (C) 2004 - 2006 rt2x00 SourceForge Project + <http://rt2x00.serialmonkey.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the + Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + Module: eeprom_93cx6 + Abstract: EEPROM reader routines for 93cx6 chipsets. + Supported chipsets: 93c46 & 93c66. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/eeprom_93cx6.h> + +MODULE_AUTHOR("http://rt2x00.serialmonkey.com"); +MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("EEPROM 93cx6 chip driver"); +MODULE_LICENSE("GPL"); + +static inline void eeprom_93cx6_pulse_high(struct eeprom_93cx6 *eeprom) +{ + eeprom->reg_data_clock = 1; + eeprom->register_write(eeprom); + + /* + * Add a short delay for the pulse to work. + * According to the specifications the "maximum minimum" + * time should be 450ns. + */ + ndelay(450); +} + +static inline void eeprom_93cx6_pulse_low(struct eeprom_93cx6 *eeprom) +{ + eeprom->reg_data_clock = 0; + eeprom->register_write(eeprom); + + /* + * Add a short delay for the pulse to work. + * According to the specifications the "maximum minimum" + * time should be 450ns. + */ + ndelay(450); +} + +static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom) +{ + /* + * Clear all flags, and enable chip select. + */ + eeprom->register_read(eeprom); + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + eeprom->reg_data_clock = 0; + eeprom->reg_chip_select = 1; + eeprom->register_write(eeprom); + + /* + * kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); +} + +static void eeprom_93cx6_cleanup(struct eeprom_93cx6 *eeprom) +{ + /* + * Clear chip_select and data_in flags. + */ + eeprom->register_read(eeprom); + eeprom->reg_data_in = 0; + eeprom->reg_chip_select = 0; + eeprom->register_write(eeprom); + + /* + * kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); +} + +static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom, + const u16 data, const u16 count) +{ + unsigned int i; + + eeprom->register_read(eeprom); + + /* + * Clear data flags. + */ + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + + /* + * Start writing all bits. + */ + for (i = count; i > 0; i--) { + /* + * Check if this bit needs to be set. + */ + eeprom->reg_data_in = !!(data & (1 << (i - 1))); + + /* + * Write the bit to the eeprom register. + */ + eeprom->register_write(eeprom); + + /* + * Kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); + } + + eeprom->reg_data_in = 0; + eeprom->register_write(eeprom); +} + +static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom, + u16 *data, const u16 count) +{ + unsigned int i; + u16 buf = 0; + + eeprom->register_read(eeprom); + + /* + * Clear data flags. + */ + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + + /* + * Start reading all bits. + */ + for (i = count; i > 0; i--) { + eeprom_93cx6_pulse_high(eeprom); + + eeprom->register_read(eeprom); + + /* + * Clear data_in flag. + */ + eeprom->reg_data_in = 0; + + /* + * Read if the bit has been set. + */ + if (eeprom->reg_data_out) + buf |= (1 << (i - 1)); + + eeprom_93cx6_pulse_low(eeprom); + } + + *data = buf; +} + +/** + * eeprom_93cx6_read - Read multiple words from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Word index from where we should start reading + * @data: target pointer where the information will have to be stored + * + * This function will read the eeprom data as host-endian word + * into the given data pointer. + */ +void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word, + u16 *data) +{ + u16 command; + + /* + * Initialize the eeprom register + */ + eeprom_93cx6_startup(eeprom); + + /* + * Select the read opcode and the word to be read. + */ + command = (PCI_EEPROM_READ_OPCODE << eeprom->width) | word; + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + + /* + * Read the requested 16 bits. + */ + eeprom_93cx6_read_bits(eeprom, data, 16); + + /* + * Cleanup eeprom register. + */ + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_read); + +/** + * eeprom_93cx6_multiread - Read multiple words from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Word index from where we should start reading + * @data: target pointer where the information will have to be stored + * @words: Number of words that should be read. + * + * This function will read all requested words from the eeprom, + * this is done by calling eeprom_93cx6_read() multiple times. + * But with the additional change that while the eeprom_93cx6_read + * will return host ordered bytes, this method will return little + * endian words. + */ +void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word, + __le16 *data, const u16 words) +{ + unsigned int i; + u16 tmp; + + for (i = 0; i < words; i++) { + tmp = 0; + eeprom_93cx6_read(eeprom, word + i, &tmp); + data[i] = cpu_to_le16(tmp); + } +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread); + diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c new file mode 100644 index 00000000000..5a6b2bce8ad --- /dev/null +++ b/drivers/misc/eeprom/max6875.c @@ -0,0 +1,227 @@ +/* + max6875.c - driver for MAX6874/MAX6875 + + Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com> + + Based on eeprom.c + + The MAX6875 has a bank of registers and two banks of EEPROM. + Address ranges are defined as follows: + * 0x0000 - 0x0046 = configuration registers + * 0x8000 - 0x8046 = configuration EEPROM + * 0x8100 - 0x82FF = user EEPROM + + This driver makes the user EEPROM available for read. + + The registers & config EEPROM should be accessed via i2c-dev. + + The MAX6875 ignores the lowest address bit, so each chip responds to + two addresses - 0x50/0x51 and 0x52/0x53. + + Note that the MAX6875 uses i2c_smbus_write_byte_data() to set the read + address, so this driver is destructive if loaded for the wrong EEPROM chip. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. +*/ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/mutex.h> + +/* The MAX6875 can only read/write 16 bytes at a time */ +#define SLICE_SIZE 16 +#define SLICE_BITS 4 + +/* USER EEPROM is at addresses 0x8100 - 0x82FF */ +#define USER_EEPROM_BASE 0x8100 +#define USER_EEPROM_SIZE 0x0200 +#define USER_EEPROM_SLICES 32 + +/* MAX6875 commands */ +#define MAX6875_CMD_BLK_READ 0x84 + +/* Each client has this additional data */ +struct max6875_data { + struct i2c_client *fake_client; + struct mutex update_lock; + + u32 valid; + u8 data[USER_EEPROM_SIZE]; + unsigned long last_updated[USER_EEPROM_SLICES]; +}; + +static void max6875_update_slice(struct i2c_client *client, int slice) +{ + struct max6875_data *data = i2c_get_clientdata(client); + int i, j, addr; + u8 *buf; + + if (slice >= USER_EEPROM_SLICES) + return; + + mutex_lock(&data->update_lock); + + buf = &data->data[slice << SLICE_BITS]; + + if (!(data->valid & (1 << slice)) || + time_after(jiffies, data->last_updated[slice])) { + + dev_dbg(&client->dev, "Starting update of slice %u\n", slice); + + data->valid &= ~(1 << slice); + + addr = USER_EEPROM_BASE + (slice << SLICE_BITS); + + /* select the eeprom address */ + if (i2c_smbus_write_byte_data(client, addr >> 8, addr & 0xFF)) { + dev_err(&client->dev, "address set failed\n"); + goto exit_up; + } + + if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + if (i2c_smbus_read_i2c_block_data(client, + MAX6875_CMD_BLK_READ, + SLICE_SIZE, + buf) != SLICE_SIZE) { + goto exit_up; + } + } else { + for (i = 0; i < SLICE_SIZE; i++) { + j = i2c_smbus_read_byte(client); + if (j < 0) { + goto exit_up; + } + buf[i] = j; + } + } + data->last_updated[slice] = jiffies; + data->valid |= (1 << slice); + } +exit_up: + mutex_unlock(&data->update_lock); +} + +static ssize_t max6875_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + struct max6875_data *data = i2c_get_clientdata(client); + int slice, max_slice; + + if (off > USER_EEPROM_SIZE) + return 0; + + if (off + count > USER_EEPROM_SIZE) + count = USER_EEPROM_SIZE - off; + + /* refresh slices which contain requested bytes */ + max_slice = (off + count - 1) >> SLICE_BITS; + for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++) + max6875_update_slice(client, slice); + + memcpy(buf, &data->data[off], count); + + return count; +} + +static struct bin_attribute user_eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO, + }, + .size = USER_EEPROM_SIZE, + .read = max6875_read, +}; + +static int max6875_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = client->adapter; + struct max6875_data *data; + int err; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA + | I2C_FUNC_SMBUS_READ_BYTE)) + return -ENODEV; + + /* Only bind to even addresses */ + if (client->addr & 1) + return -ENODEV; + + if (!(data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL))) + return -ENOMEM; + + /* A fake client is created on the odd address */ + data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1); + if (!data->fake_client) { + err = -ENOMEM; + goto exit_kfree; + } + + /* Init real i2c_client */ + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + + err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr); + if (err) + goto exit_remove_fake; + + return 0; + +exit_remove_fake: + i2c_unregister_device(data->fake_client); +exit_kfree: + kfree(data); + return err; +} + +static int max6875_remove(struct i2c_client *client) +{ + struct max6875_data *data = i2c_get_clientdata(client); + + i2c_unregister_device(data->fake_client); + + sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr); + kfree(data); + + return 0; +} + +static const struct i2c_device_id max6875_id[] = { + { "max6875", 0 }, + { } +}; + +static struct i2c_driver max6875_driver = { + .driver = { + .name = "max6875", + }, + .probe = max6875_probe, + .remove = max6875_remove, + .id_table = max6875_id, +}; + +static int __init max6875_init(void) +{ + return i2c_add_driver(&max6875_driver); +} + +static void __exit max6875_exit(void) +{ + i2c_del_driver(&max6875_driver); +} + + +MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>"); +MODULE_DESCRIPTION("MAX6875 driver"); +MODULE_LICENSE("GPL"); + +module_init(max6875_init); +module_exit(max6875_exit); diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c new file mode 100644 index 00000000000..1eac626e710 --- /dev/null +++ b/drivers/misc/enclosure.c @@ -0,0 +1,566 @@ +/* + * Enclosure Services + * + * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com> + * +**----------------------------------------------------------------------------- +** +** This program is free software; you can redistribute it and/or +** modify it under the terms of the GNU General Public License +** version 2 as published by the Free Software Foundation. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** +**----------------------------------------------------------------------------- +*/ +#include <linux/device.h> +#include <linux/enclosure.h> +#include <linux/err.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> + +static LIST_HEAD(container_list); +static DEFINE_MUTEX(container_list_lock); +static struct class enclosure_class; + +/** + * enclosure_find - find an enclosure given a parent device + * @dev: the parent to match against + * @start: Optional enclosure device to start from (NULL if none) + * + * Looks through the list of registered enclosures to find all those + * with @dev as a parent. Returns NULL if no enclosure is + * found. @start can be used as a starting point to obtain multiple + * enclosures per parent (should begin with NULL and then be set to + * each returned enclosure device). Obtains a reference to the + * enclosure class device which must be released with device_put(). + * If @start is not NULL, a reference must be taken on it which is + * released before returning (this allows a loop through all + * enclosures to exit with only the reference on the enclosure of + * interest held). Note that the @dev may correspond to the actual + * device housing the enclosure, in which case no iteration via @start + * is required. + */ +struct enclosure_device *enclosure_find(struct device *dev, + struct enclosure_device *start) +{ + struct enclosure_device *edev; + + mutex_lock(&container_list_lock); + edev = list_prepare_entry(start, &container_list, node); + if (start) + put_device(&start->edev); + + list_for_each_entry_continue(edev, &container_list, node) { + struct device *parent = edev->edev.parent; + /* parent might not be immediate, so iterate up to + * the root of the tree if necessary */ + while (parent) { + if (parent == dev) { + get_device(&edev->edev); + mutex_unlock(&container_list_lock); + return edev; + } + parent = parent->parent; + } + } + mutex_unlock(&container_list_lock); + + return NULL; +} +EXPORT_SYMBOL_GPL(enclosure_find); + +/** + * enclosure_for_each_device - calls a function for each enclosure + * @fn: the function to call + * @data: the data to pass to each call + * + * Loops over all the enclosures calling the function. + * + * Note, this function uses a mutex which will be held across calls to + * @fn, so it must have non atomic context, and @fn may (although it + * should not) sleep or otherwise cause the mutex to be held for + * indefinite periods + */ +int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *), + void *data) +{ + int error = 0; + struct enclosure_device *edev; + + mutex_lock(&container_list_lock); + list_for_each_entry(edev, &container_list, node) { + error = fn(edev, data); + if (error) + break; + } + mutex_unlock(&container_list_lock); + + return error; +} +EXPORT_SYMBOL_GPL(enclosure_for_each_device); + +/** + * enclosure_register - register device as an enclosure + * + * @dev: device containing the enclosure + * @components: number of components in the enclosure + * + * This sets up the device for being an enclosure. Note that @dev does + * not have to be a dedicated enclosure device. It may be some other type + * of device that additionally responds to enclosure services + */ +struct enclosure_device * +enclosure_register(struct device *dev, const char *name, int components, + struct enclosure_component_callbacks *cb) +{ + struct enclosure_device *edev = + kzalloc(sizeof(struct enclosure_device) + + sizeof(struct enclosure_component)*components, + GFP_KERNEL); + int err, i; + + BUG_ON(!cb); + + if (!edev) + return ERR_PTR(-ENOMEM); + + edev->components = components; + + edev->edev.class = &enclosure_class; + edev->edev.parent = get_device(dev); + edev->cb = cb; + dev_set_name(&edev->edev, "%s", name); + err = device_register(&edev->edev); + if (err) + goto err; + + for (i = 0; i < components; i++) + edev->component[i].number = -1; + + mutex_lock(&container_list_lock); + list_add_tail(&edev->node, &container_list); + mutex_unlock(&container_list_lock); + + return edev; + + err: + put_device(edev->edev.parent); + kfree(edev); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(enclosure_register); + +static struct enclosure_component_callbacks enclosure_null_callbacks; + +/** + * enclosure_unregister - remove an enclosure + * + * @edev: the registered enclosure to remove; + */ +void enclosure_unregister(struct enclosure_device *edev) +{ + int i; + + mutex_lock(&container_list_lock); + list_del(&edev->node); + mutex_unlock(&container_list_lock); + + for (i = 0; i < edev->components; i++) + if (edev->component[i].number != -1) + device_unregister(&edev->component[i].cdev); + + /* prevent any callbacks into service user */ + edev->cb = &enclosure_null_callbacks; + device_unregister(&edev->edev); +} +EXPORT_SYMBOL_GPL(enclosure_unregister); + +#define ENCLOSURE_NAME_SIZE 64 + +static void enclosure_link_name(struct enclosure_component *cdev, char *name) +{ + strcpy(name, "enclosure_device:"); + strcat(name, dev_name(&cdev->cdev)); +} + +static void enclosure_remove_links(struct enclosure_component *cdev) +{ + char name[ENCLOSURE_NAME_SIZE]; + + enclosure_link_name(cdev, name); + sysfs_remove_link(&cdev->dev->kobj, name); + sysfs_remove_link(&cdev->cdev.kobj, "device"); +} + +static int enclosure_add_links(struct enclosure_component *cdev) +{ + int error; + char name[ENCLOSURE_NAME_SIZE]; + + error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device"); + if (error) + return error; + + enclosure_link_name(cdev, name); + error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name); + if (error) + sysfs_remove_link(&cdev->cdev.kobj, "device"); + + return error; +} + +static void enclosure_release(struct device *cdev) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + put_device(cdev->parent); + kfree(edev); +} + +static void enclosure_component_release(struct device *dev) +{ + struct enclosure_component *cdev = to_enclosure_component(dev); + + if (cdev->dev) { + enclosure_remove_links(cdev); + put_device(cdev->dev); + } + put_device(dev->parent); +} + +static const struct attribute_group *enclosure_groups[]; + +/** + * enclosure_component_register - add a particular component to an enclosure + * @edev: the enclosure to add the component + * @num: the device number + * @type: the type of component being added + * @name: an optional name to appear in sysfs (leave NULL if none) + * + * Registers the component. The name is optional for enclosures that + * give their components a unique name. If not, leave the field NULL + * and a name will be assigned. + * + * Returns a pointer to the enclosure component or an error. + */ +struct enclosure_component * +enclosure_component_register(struct enclosure_device *edev, + unsigned int number, + enum enclosure_component_type type, + const char *name) +{ + struct enclosure_component *ecomp; + struct device *cdev; + int err; + + if (number >= edev->components) + return ERR_PTR(-EINVAL); + + ecomp = &edev->component[number]; + + if (ecomp->number != -1) + return ERR_PTR(-EINVAL); + + ecomp->type = type; + ecomp->number = number; + cdev = &ecomp->cdev; + cdev->parent = get_device(&edev->edev); + if (name && name[0]) + dev_set_name(cdev, "%s", name); + else + dev_set_name(cdev, "%u", number); + + cdev->release = enclosure_component_release; + cdev->groups = enclosure_groups; + + err = device_register(cdev); + if (err) + ERR_PTR(err); + + return ecomp; +} +EXPORT_SYMBOL_GPL(enclosure_component_register); + +/** + * enclosure_add_device - add a device as being part of an enclosure + * @edev: the enclosure device being added to. + * @num: the number of the component + * @dev: the device being added + * + * Declares a real device to reside in slot (or identifier) @num of an + * enclosure. This will cause the relevant sysfs links to appear. + * This function may also be used to change a device associated with + * an enclosure without having to call enclosure_remove_device() in + * between. + * + * Returns zero on success or an error. + */ +int enclosure_add_device(struct enclosure_device *edev, int component, + struct device *dev) +{ + struct enclosure_component *cdev; + + if (!edev || component >= edev->components) + return -EINVAL; + + cdev = &edev->component[component]; + + if (cdev->dev == dev) + return -EEXIST; + + if (cdev->dev) + enclosure_remove_links(cdev); + + put_device(cdev->dev); + cdev->dev = get_device(dev); + return enclosure_add_links(cdev); +} +EXPORT_SYMBOL_GPL(enclosure_add_device); + +/** + * enclosure_remove_device - remove a device from an enclosure + * @edev: the enclosure device + * @num: the number of the component to remove + * + * Returns zero on success or an error. + * + */ +int enclosure_remove_device(struct enclosure_device *edev, struct device *dev) +{ + struct enclosure_component *cdev; + int i; + + if (!edev || !dev) + return -EINVAL; + + for (i = 0; i < edev->components; i++) { + cdev = &edev->component[i]; + if (cdev->dev == dev) { + enclosure_remove_links(cdev); + device_del(&cdev->cdev); + put_device(dev); + cdev->dev = NULL; + return device_add(&cdev->cdev); + } + } + return -ENODEV; +} +EXPORT_SYMBOL_GPL(enclosure_remove_device); + +/* + * sysfs pieces below + */ + +static ssize_t enclosure_show_components(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + return snprintf(buf, 40, "%d\n", edev->components); +} + +static struct device_attribute enclosure_attrs[] = { + __ATTR(components, S_IRUGO, enclosure_show_components, NULL), + __ATTR_NULL +}; + +static struct class enclosure_class = { + .name = "enclosure", + .owner = THIS_MODULE, + .dev_release = enclosure_release, + .dev_attrs = enclosure_attrs, +}; + +static const char *const enclosure_status [] = { + [ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported", + [ENCLOSURE_STATUS_OK] = "OK", + [ENCLOSURE_STATUS_CRITICAL] = "critical", + [ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical", + [ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable", + [ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed", + [ENCLOSURE_STATUS_UNKNOWN] = "unknown", + [ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable", + [ENCLOSURE_STATUS_MAX] = NULL, +}; + +static const char *const enclosure_type [] = { + [ENCLOSURE_COMPONENT_DEVICE] = "device", + [ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device", +}; + +static ssize_t get_component_fault(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_fault) + edev->cb->get_fault(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->fault); +} + +static ssize_t set_component_fault(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_fault) + edev->cb->set_fault(edev, ecomp, val); + return count; +} + +static ssize_t get_component_status(struct device *cdev, + struct device_attribute *attr,char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_status) + edev->cb->get_status(edev, ecomp); + return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]); +} + +static ssize_t set_component_status(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int i; + + for (i = 0; enclosure_status[i]; i++) { + if (strncmp(buf, enclosure_status[i], + strlen(enclosure_status[i])) == 0 && + (buf[strlen(enclosure_status[i])] == '\n' || + buf[strlen(enclosure_status[i])] == '\0')) + break; + } + + if (enclosure_status[i] && edev->cb->set_status) { + edev->cb->set_status(edev, ecomp, i); + return count; + } else + return -EINVAL; +} + +static ssize_t get_component_active(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_active) + edev->cb->get_active(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->active); +} + +static ssize_t set_component_active(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_active) + edev->cb->set_active(edev, ecomp, val); + return count; +} + +static ssize_t get_component_locate(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_locate) + edev->cb->get_locate(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->locate); +} + +static ssize_t set_component_locate(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_locate) + edev->cb->set_locate(edev, ecomp, val); + return count; +} + +static ssize_t get_component_type(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + return snprintf(buf, 40, "%s\n", enclosure_type[ecomp->type]); +} + + +static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault, + set_component_fault); +static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status, + set_component_status); +static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active, + set_component_active); +static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate, + set_component_locate); +static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL); + +static struct attribute *enclosure_component_attrs[] = { + &dev_attr_fault.attr, + &dev_attr_status.attr, + &dev_attr_active.attr, + &dev_attr_locate.attr, + &dev_attr_type.attr, + NULL +}; + +static struct attribute_group enclosure_group = { + .attrs = enclosure_component_attrs, +}; + +static const struct attribute_group *enclosure_groups[] = { + &enclosure_group, + NULL +}; + +static int __init enclosure_init(void) +{ + int err; + + err = class_register(&enclosure_class); + if (err) + return err; + + return 0; +} + +static void __exit enclosure_exit(void) +{ + class_unregister(&enclosure_class); +} + +module_init(enclosure_init); +module_exit(enclosure_exit); + +MODULE_AUTHOR("James Bottomley"); +MODULE_DESCRIPTION("Enclosure Services"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/ep93xx_pwm.c b/drivers/misc/ep93xx_pwm.c new file mode 100644 index 00000000000..ba4694169d7 --- /dev/null +++ b/drivers/misc/ep93xx_pwm.c @@ -0,0 +1,384 @@ +/* + * Simple PWM driver for EP93XX + * + * (c) Copyright 2009 Matthieu Crapet <mcrapet@gmail.com> + * (c) Copyright 2009 H Hartley Sweeten <hsweeten@visionengravers.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * EP9307 has only one channel: + * - PWMOUT + * + * EP9301/02/12/15 have two channels: + * - PWMOUT + * - PWMOUT1 (alternate function for EGPIO14) + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> + +#include <mach/platform.h> + +#define EP93XX_PWMx_TERM_COUNT 0x00 +#define EP93XX_PWMx_DUTY_CYCLE 0x04 +#define EP93XX_PWMx_ENABLE 0x08 +#define EP93XX_PWMx_INVERT 0x0C + +#define EP93XX_PWM_MAX_COUNT 0xFFFF + +struct ep93xx_pwm { + void __iomem *mmio_base; + struct clk *clk; + u32 duty_percent; +}; + +static inline void ep93xx_pwm_writel(struct ep93xx_pwm *pwm, + unsigned int val, unsigned int off) +{ + __raw_writel(val, pwm->mmio_base + off); +} + +static inline unsigned int ep93xx_pwm_readl(struct ep93xx_pwm *pwm, + unsigned int off) +{ + return __raw_readl(pwm->mmio_base + off); +} + +static inline void ep93xx_pwm_write_tc(struct ep93xx_pwm *pwm, u16 value) +{ + ep93xx_pwm_writel(pwm, value, EP93XX_PWMx_TERM_COUNT); +} + +static inline u16 ep93xx_pwm_read_tc(struct ep93xx_pwm *pwm) +{ + return ep93xx_pwm_readl(pwm, EP93XX_PWMx_TERM_COUNT); +} + +static inline void ep93xx_pwm_write_dc(struct ep93xx_pwm *pwm, u16 value) +{ + ep93xx_pwm_writel(pwm, value, EP93XX_PWMx_DUTY_CYCLE); +} + +static inline void ep93xx_pwm_enable(struct ep93xx_pwm *pwm) +{ + ep93xx_pwm_writel(pwm, 0x1, EP93XX_PWMx_ENABLE); +} + +static inline void ep93xx_pwm_disable(struct ep93xx_pwm *pwm) +{ + ep93xx_pwm_writel(pwm, 0x0, EP93XX_PWMx_ENABLE); +} + +static inline int ep93xx_pwm_is_enabled(struct ep93xx_pwm *pwm) +{ + return ep93xx_pwm_readl(pwm, EP93XX_PWMx_ENABLE) & 0x1; +} + +static inline void ep93xx_pwm_invert(struct ep93xx_pwm *pwm) +{ + ep93xx_pwm_writel(pwm, 0x1, EP93XX_PWMx_INVERT); +} + +static inline void ep93xx_pwm_normal(struct ep93xx_pwm *pwm) +{ + ep93xx_pwm_writel(pwm, 0x0, EP93XX_PWMx_INVERT); +} + +static inline int ep93xx_pwm_is_inverted(struct ep93xx_pwm *pwm) +{ + return ep93xx_pwm_readl(pwm, EP93XX_PWMx_INVERT) & 0x1; +} + +/* + * /sys/devices/platform/ep93xx-pwm.N + * /min_freq read-only minimum pwm output frequency + * /max_req read-only maximum pwm output frequency + * /freq read-write pwm output frequency (0 = disable output) + * /duty_percent read-write pwm duty cycle percent (1..99) + * /invert read-write invert pwm output + */ + +static ssize_t ep93xx_pwm_get_min_freq(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + unsigned long rate = clk_get_rate(pwm->clk); + + return sprintf(buf, "%ld\n", rate / (EP93XX_PWM_MAX_COUNT + 1)); +} + +static ssize_t ep93xx_pwm_get_max_freq(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + unsigned long rate = clk_get_rate(pwm->clk); + + return sprintf(buf, "%ld\n", rate / 2); +} + +static ssize_t ep93xx_pwm_get_freq(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + + if (ep93xx_pwm_is_enabled(pwm)) { + unsigned long rate = clk_get_rate(pwm->clk); + u16 term = ep93xx_pwm_read_tc(pwm); + + return sprintf(buf, "%ld\n", rate / (term + 1)); + } else { + return sprintf(buf, "disabled\n"); + } +} + +static ssize_t ep93xx_pwm_set_freq(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + long val; + int err; + + err = strict_strtol(buf, 10, &val); + if (err) + return -EINVAL; + + if (val == 0) { + ep93xx_pwm_disable(pwm); + } else if (val <= (clk_get_rate(pwm->clk) / 2)) { + u32 term, duty; + + val = (clk_get_rate(pwm->clk) / val) - 1; + if (val > EP93XX_PWM_MAX_COUNT) + val = EP93XX_PWM_MAX_COUNT; + if (val < 1) + val = 1; + + term = ep93xx_pwm_read_tc(pwm); + duty = ((val + 1) * pwm->duty_percent / 100) - 1; + + /* If pwm is running, order is important */ + if (val > term) { + ep93xx_pwm_write_tc(pwm, val); + ep93xx_pwm_write_dc(pwm, duty); + } else { + ep93xx_pwm_write_dc(pwm, duty); + ep93xx_pwm_write_tc(pwm, val); + } + + if (!ep93xx_pwm_is_enabled(pwm)) + ep93xx_pwm_enable(pwm); + } else { + return -EINVAL; + } + + return count; +} + +static ssize_t ep93xx_pwm_get_duty_percent(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + + return sprintf(buf, "%d\n", pwm->duty_percent); +} + +static ssize_t ep93xx_pwm_set_duty_percent(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + long val; + int err; + + err = strict_strtol(buf, 10, &val); + if (err) + return -EINVAL; + + if (val > 0 && val < 100) { + u32 term = ep93xx_pwm_read_tc(pwm); + ep93xx_pwm_write_dc(pwm, ((term + 1) * val / 100) - 1); + pwm->duty_percent = val; + return count; + } + + return -EINVAL; +} + +static ssize_t ep93xx_pwm_get_invert(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + + return sprintf(buf, "%d\n", ep93xx_pwm_is_inverted(pwm)); +} + +static ssize_t ep93xx_pwm_set_invert(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + long val; + int err; + + err = strict_strtol(buf, 10, &val); + if (err) + return -EINVAL; + + if (val == 0) + ep93xx_pwm_normal(pwm); + else if (val == 1) + ep93xx_pwm_invert(pwm); + else + return -EINVAL; + + return count; +} + +static DEVICE_ATTR(min_freq, S_IRUGO, ep93xx_pwm_get_min_freq, NULL); +static DEVICE_ATTR(max_freq, S_IRUGO, ep93xx_pwm_get_max_freq, NULL); +static DEVICE_ATTR(freq, S_IWUGO | S_IRUGO, + ep93xx_pwm_get_freq, ep93xx_pwm_set_freq); +static DEVICE_ATTR(duty_percent, S_IWUGO | S_IRUGO, + ep93xx_pwm_get_duty_percent, ep93xx_pwm_set_duty_percent); +static DEVICE_ATTR(invert, S_IWUGO | S_IRUGO, + ep93xx_pwm_get_invert, ep93xx_pwm_set_invert); + +static struct attribute *ep93xx_pwm_attrs[] = { + &dev_attr_min_freq.attr, + &dev_attr_max_freq.attr, + &dev_attr_freq.attr, + &dev_attr_duty_percent.attr, + &dev_attr_invert.attr, + NULL +}; + +static const struct attribute_group ep93xx_pwm_sysfs_files = { + .attrs = ep93xx_pwm_attrs, +}; + +static int __init ep93xx_pwm_probe(struct platform_device *pdev) +{ + struct ep93xx_pwm *pwm; + struct resource *res; + int err; + + err = ep93xx_pwm_acquire_gpio(pdev); + if (err) + return err; + + pwm = kzalloc(sizeof(struct ep93xx_pwm), GFP_KERNEL); + if (!pwm) { + err = -ENOMEM; + goto fail_no_mem; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + err = -ENXIO; + goto fail_no_mem_resource; + } + + res = request_mem_region(res->start, resource_size(res), pdev->name); + if (res == NULL) { + err = -EBUSY; + goto fail_no_mem_resource; + } + + pwm->mmio_base = ioremap(res->start, resource_size(res)); + if (pwm->mmio_base == NULL) { + err = -ENXIO; + goto fail_no_ioremap; + } + + err = sysfs_create_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files); + if (err) + goto fail_no_sysfs; + + pwm->clk = clk_get(&pdev->dev, "pwm_clk"); + if (IS_ERR(pwm->clk)) { + err = PTR_ERR(pwm->clk); + goto fail_no_clk; + } + + pwm->duty_percent = 50; + + platform_set_drvdata(pdev, pwm); + + /* disable pwm at startup. Avoids zero value. */ + ep93xx_pwm_disable(pwm); + ep93xx_pwm_write_tc(pwm, EP93XX_PWM_MAX_COUNT); + ep93xx_pwm_write_dc(pwm, EP93XX_PWM_MAX_COUNT / 2); + + clk_enable(pwm->clk); + + return 0; + +fail_no_clk: + sysfs_remove_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files); +fail_no_sysfs: + iounmap(pwm->mmio_base); +fail_no_ioremap: + release_mem_region(res->start, resource_size(res)); +fail_no_mem_resource: + kfree(pwm); +fail_no_mem: + ep93xx_pwm_release_gpio(pdev); + return err; +} + +static int __exit ep93xx_pwm_remove(struct platform_device *pdev) +{ + struct ep93xx_pwm *pwm = platform_get_drvdata(pdev); + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + ep93xx_pwm_disable(pwm); + clk_disable(pwm->clk); + clk_put(pwm->clk); + platform_set_drvdata(pdev, NULL); + sysfs_remove_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files); + iounmap(pwm->mmio_base); + release_mem_region(res->start, resource_size(res)); + kfree(pwm); + ep93xx_pwm_release_gpio(pdev); + + return 0; +} + +static struct platform_driver ep93xx_pwm_driver = { + .driver = { + .name = "ep93xx-pwm", + .owner = THIS_MODULE, + }, + .remove = __exit_p(ep93xx_pwm_remove), +}; + +static int __init ep93xx_pwm_init(void) +{ + return platform_driver_probe(&ep93xx_pwm_driver, ep93xx_pwm_probe); +} + +static void __exit ep93xx_pwm_exit(void) +{ + platform_driver_unregister(&ep93xx_pwm_driver); +} + +module_init(ep93xx_pwm_init); +module_exit(ep93xx_pwm_exit); + +MODULE_AUTHOR("Matthieu Crapet <mcrapet@gmail.com>, " + "H Hartley Sweeten <hsweeten@visionengravers.com>"); +MODULE_DESCRIPTION("EP93xx PWM driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ep93xx-pwm"); diff --git a/drivers/misc/hdpuftrs/Makefile b/drivers/misc/hdpuftrs/Makefile new file mode 100644 index 00000000000..ac74ae67923 --- /dev/null +++ b/drivers/misc/hdpuftrs/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_HDPU_FEATURES) := hdpu_cpustate.o hdpu_nexus.o diff --git a/drivers/misc/hdpuftrs/hdpu_cpustate.c b/drivers/misc/hdpuftrs/hdpu_cpustate.c new file mode 100644 index 00000000000..176fe4e09d3 --- /dev/null +++ b/drivers/misc/hdpuftrs/hdpu_cpustate.c @@ -0,0 +1,256 @@ +/* + * Sky CPU State Driver + * + * Copyright (C) 2002 Brian Waite + * + * This driver allows use of the CPU state bits + * It exports the /dev/sky_cpustate and also + * /proc/sky_cpustate pseudo-file for status information. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/smp_lock.h> +#include <linux/miscdevice.h> +#include <linux/proc_fs.h> +#include <linux/hdpu_features.h> +#include <linux/platform_device.h> +#include <asm/uaccess.h> +#include <linux/seq_file.h> +#include <asm/io.h> + +#define SKY_CPUSTATE_VERSION "1.1" + +static int hdpu_cpustate_probe(struct platform_device *pdev); +static int hdpu_cpustate_remove(struct platform_device *pdev); + +static unsigned char cpustate_get_state(void); +static int cpustate_proc_open(struct inode *inode, struct file *file); +static int cpustate_proc_read(struct seq_file *seq, void *offset); + +static struct cpustate_t cpustate; + +static const struct file_operations proc_cpustate = { + .open = cpustate_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static int cpustate_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cpustate_proc_read, NULL); +} + +static int cpustate_proc_read(struct seq_file *seq, void *offset) +{ + seq_printf(seq, "CPU State: %04x\n", cpustate_get_state()); + return 0; +} + +static int cpustate_get_ref(int excl) +{ + + int retval = -EBUSY; + + spin_lock(&cpustate.lock); + + if (cpustate.excl) + goto out_busy; + + if (excl) { + if (cpustate.open_count) + goto out_busy; + cpustate.excl = 1; + } + + cpustate.open_count++; + retval = 0; + + out_busy: + spin_unlock(&cpustate.lock); + return retval; +} + +static int cpustate_free_ref(void) +{ + + spin_lock(&cpustate.lock); + + cpustate.excl = 0; + cpustate.open_count--; + + spin_unlock(&cpustate.lock); + return 0; +} + +static unsigned char cpustate_get_state(void) +{ + + return cpustate.cached_val; +} + +static void cpustate_set_state(unsigned char new_state) +{ + unsigned int state = (new_state << 21); + +#ifdef DEBUG_CPUSTATE + printk("CPUSTATE -> 0x%x\n", new_state); +#endif + spin_lock(&cpustate.lock); + cpustate.cached_val = new_state; + writel((0xff << 21), cpustate.clr_addr); + writel(state, cpustate.set_addr); + spin_unlock(&cpustate.lock); +} + +/* + * Now all the various file operations that we export. + */ + +static ssize_t cpustate_read(struct file *file, char *buf, + size_t count, loff_t * ppos) +{ + unsigned char data; + + if (count < 0) + return -EFAULT; + if (count == 0) + return 0; + + data = cpustate_get_state(); + if (copy_to_user(buf, &data, sizeof(unsigned char))) + return -EFAULT; + return sizeof(unsigned char); +} + +static ssize_t cpustate_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + unsigned char data; + + if (count < 0) + return -EFAULT; + + if (count == 0) + return 0; + + if (copy_from_user((unsigned char *)&data, buf, sizeof(unsigned char))) + return -EFAULT; + + cpustate_set_state(data); + return sizeof(unsigned char); +} + +static int cpustate_open(struct inode *inode, struct file *file) +{ + int ret; + + lock_kernel(); + ret = cpustate_get_ref((file->f_flags & O_EXCL)); + unlock_kernel(); + + return ret; +} + +static int cpustate_release(struct inode *inode, struct file *file) +{ + return cpustate_free_ref(); +} + +static struct platform_driver hdpu_cpustate_driver = { + .probe = hdpu_cpustate_probe, + .remove = hdpu_cpustate_remove, + .driver = { + .name = HDPU_CPUSTATE_NAME, + .owner = THIS_MODULE, + }, +}; + +/* + * The various file operations we support. + */ +static const struct file_operations cpustate_fops = { + .owner = THIS_MODULE, + .open = cpustate_open, + .release = cpustate_release, + .read = cpustate_read, + .write = cpustate_write, + .llseek = no_llseek, +}; + +static struct miscdevice cpustate_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "sky_cpustate", + .fops = &cpustate_fops, +}; + +static int hdpu_cpustate_probe(struct platform_device *pdev) +{ + struct resource *res; + struct proc_dir_entry *proc_de; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + printk(KERN_ERR "sky_cpustate: " + "Invalid memory resource.\n"); + return -EINVAL; + } + cpustate.set_addr = (unsigned long *)res->start; + cpustate.clr_addr = (unsigned long *)res->end - 1; + + ret = misc_register(&cpustate_dev); + if (ret) { + printk(KERN_WARNING "sky_cpustate: " + "Unable to register misc device.\n"); + cpustate.set_addr = NULL; + cpustate.clr_addr = NULL; + return ret; + } + + proc_de = proc_create("sky_cpustate", 0666, NULL, &proc_cpustate); + if (!proc_de) { + printk(KERN_WARNING "sky_cpustate: " + "Unable to create proc entry\n"); + } + + printk(KERN_INFO "Sky CPU State Driver v" SKY_CPUSTATE_VERSION "\n"); + return 0; +} + +static int hdpu_cpustate_remove(struct platform_device *pdev) +{ + cpustate.set_addr = NULL; + cpustate.clr_addr = NULL; + + remove_proc_entry("sky_cpustate", NULL); + misc_deregister(&cpustate_dev); + + return 0; +} + +static int __init cpustate_init(void) +{ + return platform_driver_register(&hdpu_cpustate_driver); +} + +static void __exit cpustate_exit(void) +{ + platform_driver_unregister(&hdpu_cpustate_driver); +} + +module_init(cpustate_init); +module_exit(cpustate_exit); + +MODULE_AUTHOR("Brian Waite"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" HDPU_CPUSTATE_NAME); diff --git a/drivers/misc/hdpuftrs/hdpu_nexus.c b/drivers/misc/hdpuftrs/hdpu_nexus.c new file mode 100644 index 00000000000..ce39fa54949 --- /dev/null +++ b/drivers/misc/hdpuftrs/hdpu_nexus.c @@ -0,0 +1,149 @@ +/* + * Sky Nexus Register Driver + * + * Copyright (C) 2002 Brian Waite + * + * This driver allows reading the Nexus register + * It exports the /proc/sky_chassis_id and also + * /proc/sky_slot_id pseudo-file for status information. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/hdpu_features.h> +#include <linux/platform_device.h> +#include <linux/seq_file.h> +#include <asm/io.h> + +static int hdpu_nexus_probe(struct platform_device *pdev); +static int hdpu_nexus_remove(struct platform_device *pdev); +static int hdpu_slot_id_open(struct inode *inode, struct file *file); +static int hdpu_slot_id_read(struct seq_file *seq, void *offset); +static int hdpu_chassis_id_open(struct inode *inode, struct file *file); +static int hdpu_chassis_id_read(struct seq_file *seq, void *offset); + +static struct proc_dir_entry *hdpu_slot_id; +static struct proc_dir_entry *hdpu_chassis_id; +static int slot_id = -1; +static int chassis_id = -1; + +static const struct file_operations proc_slot_id = { + .open = hdpu_slot_id_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static const struct file_operations proc_chassis_id = { + .open = hdpu_chassis_id_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static struct platform_driver hdpu_nexus_driver = { + .probe = hdpu_nexus_probe, + .remove = hdpu_nexus_remove, + .driver = { + .name = HDPU_NEXUS_NAME, + .owner = THIS_MODULE, + }, +}; + +static int hdpu_slot_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, hdpu_slot_id_read, NULL); +} + +static int hdpu_slot_id_read(struct seq_file *seq, void *offset) +{ + seq_printf(seq, "%d\n", slot_id); + return 0; +} + +static int hdpu_chassis_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, hdpu_chassis_id_read, NULL); +} + +static int hdpu_chassis_id_read(struct seq_file *seq, void *offset) +{ + seq_printf(seq, "%d\n", chassis_id); + return 0; +} + +static int hdpu_nexus_probe(struct platform_device *pdev) +{ + struct resource *res; + int *nexus_id_addr; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + printk(KERN_ERR "sky_nexus: " + "Invalid memory resource.\n"); + return -EINVAL; + } + nexus_id_addr = ioremap(res->start, + (unsigned long)(res->end - res->start)); + if (nexus_id_addr) { + slot_id = (*nexus_id_addr >> 8) & 0x1f; + chassis_id = *nexus_id_addr & 0xff; + iounmap(nexus_id_addr); + } else { + printk(KERN_ERR "sky_nexus: Could not map slot id\n"); + } + + hdpu_slot_id = proc_create("sky_slot_id", 0666, NULL, &proc_slot_id); + if (!hdpu_slot_id) { + printk(KERN_WARNING "sky_nexus: " + "Unable to create proc dir entry: sky_slot_id\n"); + } + + hdpu_chassis_id = proc_create("sky_chassis_id", 0666, NULL, + &proc_chassis_id); + if (!hdpu_chassis_id) + printk(KERN_WARNING "sky_nexus: " + "Unable to create proc dir entry: sky_chassis_id\n"); + + return 0; +} + +static int hdpu_nexus_remove(struct platform_device *pdev) +{ + slot_id = -1; + chassis_id = -1; + + remove_proc_entry("sky_slot_id", NULL); + remove_proc_entry("sky_chassis_id", NULL); + + hdpu_slot_id = 0; + hdpu_chassis_id = 0; + + return 0; +} + +static int __init nexus_init(void) +{ + return platform_driver_register(&hdpu_nexus_driver); +} + +static void __exit nexus_exit(void) +{ + platform_driver_unregister(&hdpu_nexus_driver); +} + +module_init(nexus_init); +module_exit(nexus_exit); + +MODULE_AUTHOR("Brian Waite"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" HDPU_NEXUS_NAME); diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c new file mode 100644 index 00000000000..a92a3a742b4 --- /dev/null +++ b/drivers/misc/hpilo.c @@ -0,0 +1,886 @@ +/* + * Driver for HP iLO/iLO2 management processor. + * + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * David Altobelli <david.altobelli@hp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/cdev.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/io.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include "hpilo.h" + +static struct class *ilo_class; +static unsigned int ilo_major; +static char ilo_hwdev[MAX_ILO_DEV]; + +static inline int get_entry_id(int entry) +{ + return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR; +} + +static inline int get_entry_len(int entry) +{ + return ((entry & ENTRY_MASK_QWORDS) >> ENTRY_BITPOS_QWORDS) << 3; +} + +static inline int mk_entry(int id, int len) +{ + int qlen = len & 7 ? (len >> 3) + 1 : len >> 3; + return id << ENTRY_BITPOS_DESCRIPTOR | qlen << ENTRY_BITPOS_QWORDS; +} + +static inline int desc_mem_sz(int nr_entry) +{ + return nr_entry << L2_QENTRY_SZ; +} + +/* + * FIFO queues, shared with hardware. + * + * If a queue has empty slots, an entry is added to the queue tail, + * and that entry is marked as occupied. + * Entries can be dequeued from the head of the list, when the device + * has marked the entry as consumed. + * + * Returns true on successful queue/dequeue, false on failure. + */ +static int fifo_enqueue(struct ilo_hwinfo *hw, char *fifobar, int entry) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&hw->fifo_lock, flags); + if (!(fifo_q->fifobar[(fifo_q->tail + 1) & fifo_q->imask] + & ENTRY_MASK_O)) { + fifo_q->fifobar[fifo_q->tail & fifo_q->imask] |= + (entry & ENTRY_MASK_NOSTATE) | fifo_q->merge; + fifo_q->tail += 1; + ret = 1; + } + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int fifo_dequeue(struct ilo_hwinfo *hw, char *fifobar, int *entry) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + u64 c; + + spin_lock_irqsave(&hw->fifo_lock, flags); + c = fifo_q->fifobar[fifo_q->head & fifo_q->imask]; + if (c & ENTRY_MASK_C) { + if (entry) + *entry = c & ENTRY_MASK_NOSTATE; + + fifo_q->fifobar[fifo_q->head & fifo_q->imask] = + (c | ENTRY_MASK) + 1; + fifo_q->head += 1; + ret = 1; + } + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int fifo_check_recv(struct ilo_hwinfo *hw, char *fifobar) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + u64 c; + + spin_lock_irqsave(&hw->fifo_lock, flags); + c = fifo_q->fifobar[fifo_q->head & fifo_q->imask]; + if (c & ENTRY_MASK_C) + ret = 1; + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int ilo_pkt_enqueue(struct ilo_hwinfo *hw, struct ccb *ccb, + int dir, int id, int len) +{ + char *fifobar; + int entry; + + if (dir == SENDQ) + fifobar = ccb->ccb_u1.send_fifobar; + else + fifobar = ccb->ccb_u3.recv_fifobar; + + entry = mk_entry(id, len); + return fifo_enqueue(hw, fifobar, entry); +} + +static int ilo_pkt_dequeue(struct ilo_hwinfo *hw, struct ccb *ccb, + int dir, int *id, int *len, void **pkt) +{ + char *fifobar, *desc; + int entry = 0, pkt_id = 0; + int ret; + + if (dir == SENDQ) { + fifobar = ccb->ccb_u1.send_fifobar; + desc = ccb->ccb_u2.send_desc; + } else { + fifobar = ccb->ccb_u3.recv_fifobar; + desc = ccb->ccb_u4.recv_desc; + } + + ret = fifo_dequeue(hw, fifobar, &entry); + if (ret) { + pkt_id = get_entry_id(entry); + if (id) + *id = pkt_id; + if (len) + *len = get_entry_len(entry); + if (pkt) + *pkt = (void *)(desc + desc_mem_sz(pkt_id)); + } + + return ret; +} + +static int ilo_pkt_recv(struct ilo_hwinfo *hw, struct ccb *ccb) +{ + char *fifobar = ccb->ccb_u3.recv_fifobar; + + return fifo_check_recv(hw, fifobar); +} + +static inline void doorbell_set(struct ccb *ccb) +{ + iowrite8(1, ccb->ccb_u5.db_base); +} + +static inline void doorbell_clr(struct ccb *ccb) +{ + iowrite8(2, ccb->ccb_u5.db_base); +} + +static inline int ctrl_set(int l2sz, int idxmask, int desclim) +{ + int active = 0, go = 1; + return l2sz << CTRL_BITPOS_L2SZ | + idxmask << CTRL_BITPOS_FIFOINDEXMASK | + desclim << CTRL_BITPOS_DESCLIMIT | + active << CTRL_BITPOS_A | + go << CTRL_BITPOS_G; +} + +static void ctrl_setup(struct ccb *ccb, int nr_desc, int l2desc_sz) +{ + /* for simplicity, use the same parameters for send and recv ctrls */ + ccb->send_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1); + ccb->recv_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1); +} + +static inline int fifo_sz(int nr_entry) +{ + /* size of a fifo is determined by the number of entries it contains */ + return (nr_entry * sizeof(u64)) + FIFOHANDLESIZE; +} + +static void fifo_setup(void *base_addr, int nr_entry) +{ + struct fifo *fifo_q = base_addr; + int i; + + /* set up an empty fifo */ + fifo_q->head = 0; + fifo_q->tail = 0; + fifo_q->reset = 0; + fifo_q->nrents = nr_entry; + fifo_q->imask = nr_entry - 1; + fifo_q->merge = ENTRY_MASK_O; + + for (i = 0; i < nr_entry; i++) + fifo_q->fifobar[i] = 0; +} + +static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data) +{ + struct ccb *driver_ccb = &data->driver_ccb; + struct ccb __iomem *device_ccb = data->mapped_ccb; + int retries; + + /* complicated dance to tell the hw we are stopping */ + doorbell_clr(driver_ccb); + iowrite32(ioread32(&device_ccb->send_ctrl) & ~(1 << CTRL_BITPOS_G), + &device_ccb->send_ctrl); + iowrite32(ioread32(&device_ccb->recv_ctrl) & ~(1 << CTRL_BITPOS_G), + &device_ccb->recv_ctrl); + + /* give iLO some time to process stop request */ + for (retries = MAX_WAIT; retries > 0; retries--) { + doorbell_set(driver_ccb); + udelay(WAIT_TIME); + if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A)) + && + !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A))) + break; + } + if (retries == 0) + dev_err(&pdev->dev, "Closing, but controller still active\n"); + + /* clear the hw ccb */ + memset_io(device_ccb, 0, sizeof(struct ccb)); + + /* free resources used to back send/recv queues */ + pci_free_consistent(pdev, data->dma_size, data->dma_va, data->dma_pa); +} + +static int ilo_ccb_setup(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) +{ + char *dma_va, *dma_pa; + struct ccb *driver_ccb, *ilo_ccb; + + driver_ccb = &data->driver_ccb; + ilo_ccb = &data->ilo_ccb; + + data->dma_size = 2 * fifo_sz(NR_QENTRY) + + 2 * desc_mem_sz(NR_QENTRY) + + ILO_START_ALIGN + ILO_CACHE_SZ; + + data->dma_va = pci_alloc_consistent(hw->ilo_dev, data->dma_size, + &data->dma_pa); + if (!data->dma_va) + return -ENOMEM; + + dma_va = (char *)data->dma_va; + dma_pa = (char *)data->dma_pa; + + memset(dma_va, 0, data->dma_size); + + dma_va = (char *)roundup((unsigned long)dma_va, ILO_START_ALIGN); + dma_pa = (char *)roundup((unsigned long)dma_pa, ILO_START_ALIGN); + + /* + * Create two ccb's, one with virt addrs, one with phys addrs. + * Copy the phys addr ccb to device shared mem. + */ + ctrl_setup(driver_ccb, NR_QENTRY, L2_QENTRY_SZ); + ctrl_setup(ilo_ccb, NR_QENTRY, L2_QENTRY_SZ); + + fifo_setup(dma_va, NR_QENTRY); + driver_ccb->ccb_u1.send_fifobar = dma_va + FIFOHANDLESIZE; + ilo_ccb->ccb_u1.send_fifobar = dma_pa + FIFOHANDLESIZE; + dma_va += fifo_sz(NR_QENTRY); + dma_pa += fifo_sz(NR_QENTRY); + + dma_va = (char *)roundup((unsigned long)dma_va, ILO_CACHE_SZ); + dma_pa = (char *)roundup((unsigned long)dma_pa, ILO_CACHE_SZ); + + fifo_setup(dma_va, NR_QENTRY); + driver_ccb->ccb_u3.recv_fifobar = dma_va + FIFOHANDLESIZE; + ilo_ccb->ccb_u3.recv_fifobar = dma_pa + FIFOHANDLESIZE; + dma_va += fifo_sz(NR_QENTRY); + dma_pa += fifo_sz(NR_QENTRY); + + driver_ccb->ccb_u2.send_desc = dma_va; + ilo_ccb->ccb_u2.send_desc = dma_pa; + dma_pa += desc_mem_sz(NR_QENTRY); + dma_va += desc_mem_sz(NR_QENTRY); + + driver_ccb->ccb_u4.recv_desc = dma_va; + ilo_ccb->ccb_u4.recv_desc = dma_pa; + + driver_ccb->channel = slot; + ilo_ccb->channel = slot; + + driver_ccb->ccb_u5.db_base = hw->db_vaddr + (slot << L2_DB_SIZE); + ilo_ccb->ccb_u5.db_base = NULL; /* hw ccb's doorbell is not used */ + + return 0; +} + +static void ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) +{ + int pkt_id, pkt_sz; + struct ccb *driver_ccb = &data->driver_ccb; + + /* copy the ccb with physical addrs to device memory */ + data->mapped_ccb = (struct ccb __iomem *) + (hw->ram_vaddr + (slot * ILOHW_CCB_SZ)); + memcpy_toio(data->mapped_ccb, &data->ilo_ccb, sizeof(struct ccb)); + + /* put packets on the send and receive queues */ + pkt_sz = 0; + for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) { + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, pkt_sz); + doorbell_set(driver_ccb); + } + + pkt_sz = desc_mem_sz(1); + for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) + ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, pkt_sz); + + /* the ccb is ready to use */ + doorbell_clr(driver_ccb); +} + +static int ilo_ccb_verify(struct ilo_hwinfo *hw, struct ccb_data *data) +{ + int pkt_id, i; + struct ccb *driver_ccb = &data->driver_ccb; + + /* make sure iLO is really handling requests */ + for (i = MAX_WAIT; i > 0; i--) { + if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL)) + break; + udelay(WAIT_TIME); + } + + if (i == 0) { + dev_err(&hw->ilo_dev->dev, "Open could not dequeue a packet\n"); + return -EBUSY; + } + + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, 0); + doorbell_set(driver_ccb); + return 0; +} + +static inline int is_channel_reset(struct ccb *ccb) +{ + /* check for this particular channel needing a reset */ + return FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset; +} + +static inline void set_channel_reset(struct ccb *ccb) +{ + /* set a flag indicating this channel needs a reset */ + FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset = 1; +} + +static inline int get_device_outbound(struct ilo_hwinfo *hw) +{ + return ioread32(&hw->mmio_vaddr[DB_OUT]); +} + +static inline int is_db_reset(int db_out) +{ + return db_out & (1 << DB_RESET); +} + +static inline int is_device_reset(struct ilo_hwinfo *hw) +{ + /* check for global reset condition */ + return is_db_reset(get_device_outbound(hw)); +} + +static inline void clear_pending_db(struct ilo_hwinfo *hw, int clr) +{ + iowrite32(clr, &hw->mmio_vaddr[DB_OUT]); +} + +static inline void clear_device(struct ilo_hwinfo *hw) +{ + /* clear the device (reset bits, pending channel entries) */ + clear_pending_db(hw, -1); +} + +static inline void ilo_enable_interrupts(struct ilo_hwinfo *hw) +{ + iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) | 1, &hw->mmio_vaddr[DB_IRQ]); +} + +static inline void ilo_disable_interrupts(struct ilo_hwinfo *hw) +{ + iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) & ~1, + &hw->mmio_vaddr[DB_IRQ]); +} + +static void ilo_set_reset(struct ilo_hwinfo *hw) +{ + int slot; + + /* + * Mapped memory is zeroed on ilo reset, so set a per ccb flag + * to indicate that this ccb needs to be closed and reopened. + */ + for (slot = 0; slot < MAX_CCB; slot++) { + if (!hw->ccb_alloc[slot]) + continue; + set_channel_reset(&hw->ccb_alloc[slot]->driver_ccb); + } +} + +static ssize_t ilo_read(struct file *fp, char __user *buf, + size_t len, loff_t *off) +{ + int err, found, cnt, pkt_id, pkt_len; + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + struct ilo_hwinfo *hw = data->ilo_hw; + void *pkt; + + if (is_channel_reset(driver_ccb)) { + /* + * If the device has been reset, applications + * need to close and reopen all ccbs. + */ + return -ENODEV; + } + + /* + * This function is to be called when data is expected + * in the channel, and will return an error if no packet is found + * during the loop below. The sleep/retry logic is to allow + * applications to call read() immediately post write(), + * and give iLO some time to process the sent packet. + */ + cnt = 20; + do { + /* look for a received packet */ + found = ilo_pkt_dequeue(hw, driver_ccb, RECVQ, &pkt_id, + &pkt_len, &pkt); + if (found) + break; + cnt--; + msleep(100); + } while (!found && cnt); + + if (!found) + return -EAGAIN; + + /* only copy the length of the received packet */ + if (pkt_len < len) + len = pkt_len; + + err = copy_to_user(buf, pkt, len); + + /* return the received packet to the queue */ + ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, desc_mem_sz(1)); + + return err ? -EFAULT : len; +} + +static ssize_t ilo_write(struct file *fp, const char __user *buf, + size_t len, loff_t *off) +{ + int err, pkt_id, pkt_len; + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + struct ilo_hwinfo *hw = data->ilo_hw; + void *pkt; + + if (is_channel_reset(driver_ccb)) + return -ENODEV; + + /* get a packet to send the user command */ + if (!ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, &pkt_len, &pkt)) + return -EBUSY; + + /* limit the length to the length of the packet */ + if (pkt_len < len) + len = pkt_len; + + /* on failure, set the len to 0 to return empty packet to the device */ + err = copy_from_user(pkt, buf, len); + if (err) + len = 0; + + /* send the packet */ + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, len); + doorbell_set(driver_ccb); + + return err ? -EFAULT : len; +} + +static unsigned int ilo_poll(struct file *fp, poll_table *wait) +{ + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + + poll_wait(fp, &data->ccb_waitq, wait); + + if (is_channel_reset(driver_ccb)) + return POLLERR; + else if (ilo_pkt_recv(data->ilo_hw, driver_ccb)) + return POLLIN | POLLRDNORM; + + return 0; +} + +static int ilo_close(struct inode *ip, struct file *fp) +{ + int slot; + struct ccb_data *data; + struct ilo_hwinfo *hw; + unsigned long flags; + + slot = iminor(ip) % MAX_CCB; + hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev); + + spin_lock(&hw->open_lock); + + if (hw->ccb_alloc[slot]->ccb_cnt == 1) { + + data = fp->private_data; + + spin_lock_irqsave(&hw->alloc_lock, flags); + hw->ccb_alloc[slot] = NULL; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + ilo_ccb_close(hw->ilo_dev, data); + + kfree(data); + } else + hw->ccb_alloc[slot]->ccb_cnt--; + + spin_unlock(&hw->open_lock); + + return 0; +} + +static int ilo_open(struct inode *ip, struct file *fp) +{ + int slot, error; + struct ccb_data *data; + struct ilo_hwinfo *hw; + unsigned long flags; + + slot = iminor(ip) % MAX_CCB; + hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev); + + /* new ccb allocation */ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock(&hw->open_lock); + + /* each fd private_data holds sw/hw view of ccb */ + if (hw->ccb_alloc[slot] == NULL) { + /* create a channel control block for this minor */ + error = ilo_ccb_setup(hw, data, slot); + if (error) { + kfree(data); + goto out; + } + + data->ccb_cnt = 1; + data->ccb_excl = fp->f_flags & O_EXCL; + data->ilo_hw = hw; + init_waitqueue_head(&data->ccb_waitq); + + /* write the ccb to hw */ + spin_lock_irqsave(&hw->alloc_lock, flags); + ilo_ccb_open(hw, data, slot); + hw->ccb_alloc[slot] = data; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + /* make sure the channel is functional */ + error = ilo_ccb_verify(hw, data); + if (error) { + + spin_lock_irqsave(&hw->alloc_lock, flags); + hw->ccb_alloc[slot] = NULL; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + ilo_ccb_close(hw->ilo_dev, data); + + kfree(data); + goto out; + } + + } else { + kfree(data); + if (fp->f_flags & O_EXCL || hw->ccb_alloc[slot]->ccb_excl) { + /* + * The channel exists, and either this open + * or a previous open of this channel wants + * exclusive access. + */ + error = -EBUSY; + } else { + hw->ccb_alloc[slot]->ccb_cnt++; + error = 0; + } + } +out: + spin_unlock(&hw->open_lock); + + if (!error) + fp->private_data = hw->ccb_alloc[slot]; + + return error; +} + +static const struct file_operations ilo_fops = { + .owner = THIS_MODULE, + .read = ilo_read, + .write = ilo_write, + .poll = ilo_poll, + .open = ilo_open, + .release = ilo_close, +}; + +static irqreturn_t ilo_isr(int irq, void *data) +{ + struct ilo_hwinfo *hw = data; + int pending, i; + + spin_lock(&hw->alloc_lock); + + /* check for ccbs which have data */ + pending = get_device_outbound(hw); + if (!pending) { + spin_unlock(&hw->alloc_lock); + return IRQ_NONE; + } + + if (is_db_reset(pending)) { + /* wake up all ccbs if the device was reset */ + pending = -1; + ilo_set_reset(hw); + } + + for (i = 0; i < MAX_CCB; i++) { + if (!hw->ccb_alloc[i]) + continue; + if (pending & (1 << i)) + wake_up_interruptible(&hw->ccb_alloc[i]->ccb_waitq); + } + + /* clear the device of the channels that have been handled */ + clear_pending_db(hw, pending); + + spin_unlock(&hw->alloc_lock); + + return IRQ_HANDLED; +} + +static void ilo_unmap_device(struct pci_dev *pdev, struct ilo_hwinfo *hw) +{ + pci_iounmap(pdev, hw->db_vaddr); + pci_iounmap(pdev, hw->ram_vaddr); + pci_iounmap(pdev, hw->mmio_vaddr); +} + +static int __devinit ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw) +{ + int error = -ENOMEM; + + /* map the memory mapped i/o registers */ + hw->mmio_vaddr = pci_iomap(pdev, 1, 0); + if (hw->mmio_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping mmio\n"); + goto out; + } + + /* map the adapter shared memory region */ + hw->ram_vaddr = pci_iomap(pdev, 2, MAX_CCB * ILOHW_CCB_SZ); + if (hw->ram_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping shared mem\n"); + goto mmio_free; + } + + /* map the doorbell aperture */ + hw->db_vaddr = pci_iomap(pdev, 3, MAX_CCB * ONE_DB_SIZE); + if (hw->db_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping doorbell\n"); + goto ram_free; + } + + return 0; +ram_free: + pci_iounmap(pdev, hw->ram_vaddr); +mmio_free: + pci_iounmap(pdev, hw->mmio_vaddr); +out: + return error; +} + +static void ilo_remove(struct pci_dev *pdev) +{ + int i, minor; + struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev); + + clear_device(ilo_hw); + + minor = MINOR(ilo_hw->cdev.dev); + for (i = minor; i < minor + MAX_CCB; i++) + device_destroy(ilo_class, MKDEV(ilo_major, i)); + + cdev_del(&ilo_hw->cdev); + ilo_disable_interrupts(ilo_hw); + free_irq(pdev->irq, ilo_hw); + ilo_unmap_device(pdev, ilo_hw); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(ilo_hw); + ilo_hwdev[(minor / MAX_CCB)] = 0; +} + +static int __devinit ilo_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int devnum, minor, start, error; + struct ilo_hwinfo *ilo_hw; + + /* find a free range for device files */ + for (devnum = 0; devnum < MAX_ILO_DEV; devnum++) { + if (ilo_hwdev[devnum] == 0) { + ilo_hwdev[devnum] = 1; + break; + } + } + + if (devnum == MAX_ILO_DEV) { + dev_err(&pdev->dev, "Error finding free device\n"); + return -ENODEV; + } + + /* track global allocations for this device */ + error = -ENOMEM; + ilo_hw = kzalloc(sizeof(*ilo_hw), GFP_KERNEL); + if (!ilo_hw) + goto out; + + ilo_hw->ilo_dev = pdev; + spin_lock_init(&ilo_hw->alloc_lock); + spin_lock_init(&ilo_hw->fifo_lock); + spin_lock_init(&ilo_hw->open_lock); + + error = pci_enable_device(pdev); + if (error) + goto free; + + pci_set_master(pdev); + + error = pci_request_regions(pdev, ILO_NAME); + if (error) + goto disable; + + error = ilo_map_device(pdev, ilo_hw); + if (error) + goto free_regions; + + pci_set_drvdata(pdev, ilo_hw); + clear_device(ilo_hw); + + error = request_irq(pdev->irq, ilo_isr, IRQF_SHARED, "hpilo", ilo_hw); + if (error) + goto unmap; + + ilo_enable_interrupts(ilo_hw); + + cdev_init(&ilo_hw->cdev, &ilo_fops); + ilo_hw->cdev.owner = THIS_MODULE; + start = devnum * MAX_CCB; + error = cdev_add(&ilo_hw->cdev, MKDEV(ilo_major, start), MAX_CCB); + if (error) { + dev_err(&pdev->dev, "Could not add cdev\n"); + goto remove_isr; + } + + for (minor = 0 ; minor < MAX_CCB; minor++) { + struct device *dev; + dev = device_create(ilo_class, &pdev->dev, + MKDEV(ilo_major, minor), NULL, + "hpilo!d%dccb%d", devnum, minor); + if (IS_ERR(dev)) + dev_err(&pdev->dev, "Could not create files\n"); + } + + return 0; +remove_isr: + ilo_disable_interrupts(ilo_hw); + free_irq(pdev->irq, ilo_hw); +unmap: + ilo_unmap_device(pdev, ilo_hw); +free_regions: + pci_release_regions(pdev); +disable: + pci_disable_device(pdev); +free: + kfree(ilo_hw); +out: + ilo_hwdev[devnum] = 0; + return error; +} + +static struct pci_device_id ilo_devices[] = { + { PCI_DEVICE(PCI_VENDOR_ID_COMPAQ, 0xB204) }, + { PCI_DEVICE(PCI_VENDOR_ID_HP, 0x3307) }, + { } +}; +MODULE_DEVICE_TABLE(pci, ilo_devices); + +static struct pci_driver ilo_driver = { + .name = ILO_NAME, + .id_table = ilo_devices, + .probe = ilo_probe, + .remove = __devexit_p(ilo_remove), +}; + +static int __init ilo_init(void) +{ + int error; + dev_t dev; + + ilo_class = class_create(THIS_MODULE, "iLO"); + if (IS_ERR(ilo_class)) { + error = PTR_ERR(ilo_class); + goto out; + } + + error = alloc_chrdev_region(&dev, 0, MAX_OPEN, ILO_NAME); + if (error) + goto class_destroy; + + ilo_major = MAJOR(dev); + + error = pci_register_driver(&ilo_driver); + if (error) + goto chr_remove; + + return 0; +chr_remove: + unregister_chrdev_region(dev, MAX_OPEN); +class_destroy: + class_destroy(ilo_class); +out: + return error; +} + +static void __exit ilo_exit(void) +{ + pci_unregister_driver(&ilo_driver); + unregister_chrdev_region(MKDEV(ilo_major, 0), MAX_OPEN); + class_destroy(ilo_class); +} + +MODULE_VERSION("1.2"); +MODULE_ALIAS(ILO_NAME); +MODULE_DESCRIPTION(ILO_NAME); +MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>"); +MODULE_LICENSE("GPL v2"); + +module_init(ilo_init); +module_exit(ilo_exit); diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h new file mode 100644 index 00000000000..247eb386a97 --- /dev/null +++ b/drivers/misc/hpilo.h @@ -0,0 +1,212 @@ +/* + * linux/drivers/char/hpilo.h + * + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * David Altobelli <david.altobelli@hp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __HPILO_H +#define __HPILO_H + +#define ILO_NAME "hpilo" + +/* max number of open channel control blocks per device, hw limited to 32 */ +#define MAX_CCB 8 +/* max number of supported devices */ +#define MAX_ILO_DEV 1 +/* max number of files */ +#define MAX_OPEN (MAX_CCB * MAX_ILO_DEV) +/* total wait time in usec */ +#define MAX_WAIT_TIME 10000 +/* per spin wait time in usec */ +#define WAIT_TIME 10 +/* spin counter for open/close delay */ +#define MAX_WAIT (MAX_WAIT_TIME / WAIT_TIME) + +/* + * Per device, used to track global memory allocations. + */ +struct ilo_hwinfo { + /* mmio registers on device */ + char __iomem *mmio_vaddr; + + /* doorbell registers on device */ + char __iomem *db_vaddr; + + /* shared memory on device used for channel control blocks */ + char __iomem *ram_vaddr; + + /* files corresponding to this device */ + struct ccb_data *ccb_alloc[MAX_CCB]; + + struct pci_dev *ilo_dev; + + /* + * open_lock serializes ccb_cnt during open and close + * [ irq disabled ] + * -> alloc_lock used when adding/removing/searching ccb_alloc, + * which represents all ccbs open on the device + * --> fifo_lock controls access to fifo queues shared with hw + * + * Locks must be taken in this order, but open_lock and alloc_lock + * are optional, they do not need to be held in order to take a + * lower level lock. + */ + spinlock_t open_lock; + spinlock_t alloc_lock; + spinlock_t fifo_lock; + + struct cdev cdev; +}; + +/* offset from mmio_vaddr for enabling doorbell interrupts */ +#define DB_IRQ 0xB2 +/* offset from mmio_vaddr for outbound communications */ +#define DB_OUT 0xD4 +/* DB_OUT reset bit */ +#define DB_RESET 26 + +/* + * Channel control block. Used to manage hardware queues. + * The format must match hw's version. The hw ccb is 128 bytes, + * but the context area shouldn't be touched by the driver. + */ +#define ILOSW_CCB_SZ 64 +#define ILOHW_CCB_SZ 128 +struct ccb { + union { + char *send_fifobar; + u64 padding1; + } ccb_u1; + union { + char *send_desc; + u64 padding2; + } ccb_u2; + u64 send_ctrl; + + union { + char *recv_fifobar; + u64 padding3; + } ccb_u3; + union { + char *recv_desc; + u64 padding4; + } ccb_u4; + u64 recv_ctrl; + + union { + char __iomem *db_base; + u64 padding5; + } ccb_u5; + + u64 channel; + + /* unused context area (64 bytes) */ +}; + +/* ccb queue parameters */ +#define SENDQ 1 +#define RECVQ 2 +#define NR_QENTRY 4 +#define L2_QENTRY_SZ 12 + +/* ccb ctrl bitfields */ +#define CTRL_BITPOS_L2SZ 0 +#define CTRL_BITPOS_FIFOINDEXMASK 4 +#define CTRL_BITPOS_DESCLIMIT 18 +#define CTRL_BITPOS_A 30 +#define CTRL_BITPOS_G 31 + +/* ccb doorbell macros */ +#define L2_DB_SIZE 14 +#define ONE_DB_SIZE (1 << L2_DB_SIZE) + +/* + * Per fd structure used to track the ccb allocated to that dev file. + */ +struct ccb_data { + /* software version of ccb, using virtual addrs */ + struct ccb driver_ccb; + + /* hardware version of ccb, using physical addrs */ + struct ccb ilo_ccb; + + /* hardware ccb is written to this shared mapped device memory */ + struct ccb __iomem *mapped_ccb; + + /* dma'able memory used for send/recv queues */ + void *dma_va; + dma_addr_t dma_pa; + size_t dma_size; + + /* pointer to hardware device info */ + struct ilo_hwinfo *ilo_hw; + + /* queue for this ccb to wait for recv data */ + wait_queue_head_t ccb_waitq; + + /* usage count, to allow for shared ccb's */ + int ccb_cnt; + + /* open wanted exclusive access to this ccb */ + int ccb_excl; +}; + +/* + * FIFO queue structure, shared with hw. + */ +#define ILO_START_ALIGN 4096 +#define ILO_CACHE_SZ 128 +struct fifo { + u64 nrents; /* user requested number of fifo entries */ + u64 imask; /* mask to extract valid fifo index */ + u64 merge; /* O/C bits to merge in during enqueue operation */ + u64 reset; /* set to non-zero when the target device resets */ + u8 pad_0[ILO_CACHE_SZ - (sizeof(u64) * 4)]; + + u64 head; + u8 pad_1[ILO_CACHE_SZ - (sizeof(u64))]; + + u64 tail; + u8 pad_2[ILO_CACHE_SZ - (sizeof(u64))]; + + u64 fifobar[1]; +}; + +/* convert between struct fifo, and the fifobar, which is saved in the ccb */ +#define FIFOHANDLESIZE (sizeof(struct fifo) - sizeof(u64)) +#define FIFOBARTOHANDLE(_fifo) \ + ((struct fifo *)(((char *)(_fifo)) - FIFOHANDLESIZE)) + +/* the number of qwords to consume from the entry descriptor */ +#define ENTRY_BITPOS_QWORDS 0 +/* descriptor index number (within a specified queue) */ +#define ENTRY_BITPOS_DESCRIPTOR 10 +/* state bit, fifo entry consumed by consumer */ +#define ENTRY_BITPOS_C 22 +/* state bit, fifo entry is occupied */ +#define ENTRY_BITPOS_O 23 + +#define ENTRY_BITS_QWORDS 10 +#define ENTRY_BITS_DESCRIPTOR 12 +#define ENTRY_BITS_C 1 +#define ENTRY_BITS_O 1 +#define ENTRY_BITS_TOTAL \ + (ENTRY_BITS_C + ENTRY_BITS_O + \ + ENTRY_BITS_QWORDS + ENTRY_BITS_DESCRIPTOR) + +/* extract various entry fields */ +#define ENTRY_MASK ((1 << ENTRY_BITS_TOTAL) - 1) +#define ENTRY_MASK_C (((1 << ENTRY_BITS_C) - 1) << ENTRY_BITPOS_C) +#define ENTRY_MASK_O (((1 << ENTRY_BITS_O) - 1) << ENTRY_BITPOS_O) +#define ENTRY_MASK_QWORDS \ + (((1 << ENTRY_BITS_QWORDS) - 1) << ENTRY_BITPOS_QWORDS) +#define ENTRY_MASK_DESCRIPTOR \ + (((1 << ENTRY_BITS_DESCRIPTOR) - 1) << ENTRY_BITPOS_DESCRIPTOR) + +#define ENTRY_MASK_NOSTATE (ENTRY_MASK >> (ENTRY_BITS_C + ENTRY_BITS_O)) + +#endif /* __HPILO_H */ diff --git a/drivers/misc/ibmasm/Makefile b/drivers/misc/ibmasm/Makefile new file mode 100644 index 00000000000..9e63ade5ffd --- /dev/null +++ b/drivers/misc/ibmasm/Makefile @@ -0,0 +1,15 @@ + +obj-$(CONFIG_IBM_ASM) := ibmasm.o + +ibmasm-y := module.o \ + ibmasmfs.o \ + event.o \ + command.o \ + remote.o \ + heartbeat.o \ + r_heartbeat.o \ + dot_command.o \ + lowlevel.o + +ibmasm-$(CONFIG_SERIAL_8250) += uart.o + diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c new file mode 100644 index 00000000000..e2031739aa2 --- /dev/null +++ b/drivers/misc/ibmasm/command.c @@ -0,0 +1,186 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/sched.h> +#include "ibmasm.h" +#include "lowlevel.h" + +static void exec_next_command(struct service_processor *sp); + +static atomic_t command_count = ATOMIC_INIT(0); + +struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size) +{ + struct command *cmd; + + if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE) + return NULL; + + cmd = kzalloc(sizeof(struct command), GFP_KERNEL); + if (cmd == NULL) + return NULL; + + + cmd->buffer = kzalloc(buffer_size, GFP_KERNEL); + if (cmd->buffer == NULL) { + kfree(cmd); + return NULL; + } + cmd->buffer_size = buffer_size; + + kref_init(&cmd->kref); + cmd->lock = &sp->lock; + + cmd->status = IBMASM_CMD_PENDING; + init_waitqueue_head(&cmd->wait); + INIT_LIST_HEAD(&cmd->queue_node); + + atomic_inc(&command_count); + dbg("command count: %d\n", atomic_read(&command_count)); + + return cmd; +} + +void ibmasm_free_command(struct kref *kref) +{ + struct command *cmd = to_command(kref); + + list_del(&cmd->queue_node); + atomic_dec(&command_count); + dbg("command count: %d\n", atomic_read(&command_count)); + kfree(cmd->buffer); + kfree(cmd); +} + +static void enqueue_command(struct service_processor *sp, struct command *cmd) +{ + list_add_tail(&cmd->queue_node, &sp->command_queue); +} + +static struct command *dequeue_command(struct service_processor *sp) +{ + struct command *cmd; + struct list_head *next; + + if (list_empty(&sp->command_queue)) + return NULL; + + next = sp->command_queue.next; + list_del_init(next); + cmd = list_entry(next, struct command, queue_node); + + return cmd; +} + +static inline void do_exec_command(struct service_processor *sp) +{ + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + if (ibmasm_send_i2o_message(sp)) { + sp->current_command->status = IBMASM_CMD_FAILED; + wake_up(&sp->current_command->wait); + command_put(sp->current_command); + exec_next_command(sp); + } +} + +/** + * exec_command + * send a command to a service processor + * Commands are executed sequentially. One command (sp->current_command) + * is sent to the service processor. Once the interrupt handler gets a + * message of type command_response, the message is copied into + * the current commands buffer, + */ +void ibmasm_exec_command(struct service_processor *sp, struct command *cmd) +{ + unsigned long flags; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + spin_lock_irqsave(&sp->lock, flags); + + if (!sp->current_command) { + sp->current_command = cmd; + command_get(sp->current_command); + spin_unlock_irqrestore(&sp->lock, flags); + do_exec_command(sp); + } else { + enqueue_command(sp, cmd); + spin_unlock_irqrestore(&sp->lock, flags); + } +} + +static void exec_next_command(struct service_processor *sp) +{ + unsigned long flags; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + spin_lock_irqsave(&sp->lock, flags); + sp->current_command = dequeue_command(sp); + if (sp->current_command) { + command_get(sp->current_command); + spin_unlock_irqrestore(&sp->lock, flags); + do_exec_command(sp); + } else { + spin_unlock_irqrestore(&sp->lock, flags); + } +} + +/** + * Sleep until a command has failed or a response has been received + * and the command status been updated by the interrupt handler. + * (see receive_response). + */ +void ibmasm_wait_for_response(struct command *cmd, int timeout) +{ + wait_event_interruptible_timeout(cmd->wait, + cmd->status == IBMASM_CMD_COMPLETE || + cmd->status == IBMASM_CMD_FAILED, + timeout * HZ); +} + +/** + * receive_command_response + * called by the interrupt handler when a dot command of type command_response + * was received. + */ +void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size) +{ + struct command *cmd = sp->current_command; + + if (!sp->current_command) + return; + + memcpy_fromio(cmd->buffer, response, min(size, cmd->buffer_size)); + cmd->status = IBMASM_CMD_COMPLETE; + wake_up(&sp->current_command->wait); + command_put(sp->current_command); + exec_next_command(sp); +} diff --git a/drivers/misc/ibmasm/dot_command.c b/drivers/misc/ibmasm/dot_command.c new file mode 100644 index 00000000000..3dd2dfb8da1 --- /dev/null +++ b/drivers/misc/ibmasm/dot_command.c @@ -0,0 +1,152 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include "ibmasm.h" +#include "dot_command.h" + +/** + * Dispatch an incoming message to the specific handler for the message. + * Called from interrupt context. + */ +void ibmasm_receive_message(struct service_processor *sp, void *message, int message_size) +{ + u32 size; + struct dot_command_header *header = (struct dot_command_header *)message; + + if (message_size == 0) + return; + + size = get_dot_command_size(message); + if (size == 0) + return; + + if (size > message_size) + size = message_size; + + switch (header->type) { + case sp_event: + ibmasm_receive_event(sp, message, size); + break; + case sp_command_response: + ibmasm_receive_command_response(sp, message, size); + break; + case sp_heartbeat: + ibmasm_receive_heartbeat(sp, message, size); + break; + default: + dev_err(sp->dev, "Received unknown message from service processor\n"); + } +} + + +#define INIT_BUFFER_SIZE 32 + + +/** + * send the 4.3.5.10 dot command (driver VPD) to the service processor + */ +int ibmasm_send_driver_vpd(struct service_processor *sp) +{ + struct command *command; + struct dot_command_header *header; + u8 *vpd_command; + u8 *vpd_data; + int result = 0; + + command = ibmasm_new_command(sp, INIT_BUFFER_SIZE); + if (command == NULL) + return -ENOMEM; + + header = (struct dot_command_header *)command->buffer; + header->type = sp_write; + header->command_size = 4; + header->data_size = 16; + header->status = 0; + header->reserved = 0; + + vpd_command = command->buffer + sizeof(struct dot_command_header); + vpd_command[0] = 0x4; + vpd_command[1] = 0x3; + vpd_command[2] = 0x5; + vpd_command[3] = 0xa; + + vpd_data = vpd_command + header->command_size; + vpd_data[0] = 0; + strcat(vpd_data, IBMASM_DRIVER_VPD); + vpd_data[10] = 0; + vpd_data[15] = 0; + + ibmasm_exec_command(sp, command); + ibmasm_wait_for_response(command, IBMASM_CMD_TIMEOUT_NORMAL); + + if (command->status != IBMASM_CMD_COMPLETE) + result = -ENODEV; + + command_put(command); + + return result; +} + +struct os_state_command { + struct dot_command_header header; + unsigned char command[3]; + unsigned char data; +}; + +/** + * send the 4.3.6 dot command (os state) to the service processor + * During driver init this function is called with os state "up". + * This causes the service processor to start sending heartbeats the + * driver. + * During driver exit the function is called with os state "down", + * causing the service processor to stop the heartbeats. + */ +int ibmasm_send_os_state(struct service_processor *sp, int os_state) +{ + struct command *cmd; + struct os_state_command *os_state_cmd; + int result = 0; + + cmd = ibmasm_new_command(sp, sizeof(struct os_state_command)); + if (cmd == NULL) + return -ENOMEM; + + os_state_cmd = (struct os_state_command *)cmd->buffer; + os_state_cmd->header.type = sp_write; + os_state_cmd->header.command_size = 3; + os_state_cmd->header.data_size = 1; + os_state_cmd->header.status = 0; + os_state_cmd->command[0] = 4; + os_state_cmd->command[1] = 3; + os_state_cmd->command[2] = 6; + os_state_cmd->data = os_state; + + ibmasm_exec_command(sp, cmd); + ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL); + + if (cmd->status != IBMASM_CMD_COMPLETE) + result = -ENODEV; + + command_put(cmd); + return result; +} diff --git a/drivers/misc/ibmasm/dot_command.h b/drivers/misc/ibmasm/dot_command.h new file mode 100644 index 00000000000..6cbba1afef3 --- /dev/null +++ b/drivers/misc/ibmasm/dot_command.h @@ -0,0 +1,78 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#ifndef __DOT_COMMAND_H__ +#define __DOT_COMMAND_H__ + +/* + * dot commands are the protocol used to communicate with the service + * processor. + * They consist of header, a command of variable length and data of + * variable length. + */ + +/* dot command types */ +#define sp_write 0 +#define sp_write_next 1 +#define sp_read 2 +#define sp_read_next 3 +#define sp_command_response 4 +#define sp_event 5 +#define sp_heartbeat 6 + +#pragma pack(1) +struct dot_command_header { + u8 type; + u8 command_size; + u16 data_size; + u8 status; + u8 reserved; +}; +#pragma pack() + +static inline size_t get_dot_command_size(void *buffer) +{ + struct dot_command_header *cmd = (struct dot_command_header *)buffer; + return sizeof(struct dot_command_header) + cmd->command_size + cmd->data_size; +} + +static inline unsigned int get_dot_command_timeout(void *buffer) +{ + struct dot_command_header *header = (struct dot_command_header *)buffer; + unsigned char *cmd = buffer + sizeof(struct dot_command_header); + + /* dot commands 6.3.1, 7.1 and 8.x need a longer timeout */ + + if (header->command_size == 3) { + if ((cmd[0] == 6) && (cmd[1] == 3) && (cmd[2] == 1)) + return IBMASM_CMD_TIMEOUT_EXTRA; + } else if (header->command_size == 2) { + if ((cmd[0] == 7) && (cmd[1] == 1)) + return IBMASM_CMD_TIMEOUT_EXTRA; + if (cmd[0] == 8) + return IBMASM_CMD_TIMEOUT_EXTRA; + } + return IBMASM_CMD_TIMEOUT_NORMAL; +} + +#endif /* __DOT_COMMAND_H__ */ diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c new file mode 100644 index 00000000000..572d41ffc18 --- /dev/null +++ b/drivers/misc/ibmasm/event.c @@ -0,0 +1,176 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/sched.h> +#include "ibmasm.h" +#include "lowlevel.h" + +/* + * ASM service processor event handling routines. + * + * Events are signalled to the device drivers through interrupts. + * They have the format of dot commands, with the type field set to + * sp_event. + * The driver does not interpret the events, it simply stores them in a + * circular buffer. + */ + +static void wake_up_event_readers(struct service_processor *sp) +{ + struct event_reader *reader; + + list_for_each_entry(reader, &sp->event_buffer->readers, node) + wake_up_interruptible(&reader->wait); +} + +/** + * receive_event + * Called by the interrupt handler when a dot command of type sp_event is + * received. + * Store the event in the circular event buffer, wake up any sleeping + * event readers. + * There is no reader marker in the buffer, therefore readers are + * responsible for keeping up with the writer, or they will lose events. + */ +void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size) +{ + struct event_buffer *buffer = sp->event_buffer; + struct ibmasm_event *event; + unsigned long flags; + + data_size = min(data_size, IBMASM_EVENT_MAX_SIZE); + + spin_lock_irqsave(&sp->lock, flags); + /* copy the event into the next slot in the circular buffer */ + event = &buffer->events[buffer->next_index]; + memcpy_fromio(event->data, data, data_size); + event->data_size = data_size; + event->serial_number = buffer->next_serial_number; + + /* advance indices in the buffer */ + buffer->next_index = (buffer->next_index + 1) % IBMASM_NUM_EVENTS; + buffer->next_serial_number++; + spin_unlock_irqrestore(&sp->lock, flags); + + wake_up_event_readers(sp); +} + +static inline int event_available(struct event_buffer *b, struct event_reader *r) +{ + return (r->next_serial_number < b->next_serial_number); +} + +/** + * get_next_event + * Called by event readers (initiated from user space through the file + * system). + * Sleeps until a new event is available. + */ +int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader) +{ + struct event_buffer *buffer = sp->event_buffer; + struct ibmasm_event *event; + unsigned int index; + unsigned long flags; + + reader->cancelled = 0; + + if (wait_event_interruptible(reader->wait, + event_available(buffer, reader) || reader->cancelled)) + return -ERESTARTSYS; + + if (!event_available(buffer, reader)) + return 0; + + spin_lock_irqsave(&sp->lock, flags); + + index = buffer->next_index; + event = &buffer->events[index]; + while (event->serial_number < reader->next_serial_number) { + index = (index + 1) % IBMASM_NUM_EVENTS; + event = &buffer->events[index]; + } + memcpy(reader->data, event->data, event->data_size); + reader->data_size = event->data_size; + reader->next_serial_number = event->serial_number + 1; + + spin_unlock_irqrestore(&sp->lock, flags); + + return event->data_size; +} + +void ibmasm_cancel_next_event(struct event_reader *reader) +{ + reader->cancelled = 1; + wake_up_interruptible(&reader->wait); +} + +void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader) +{ + unsigned long flags; + + reader->next_serial_number = sp->event_buffer->next_serial_number; + init_waitqueue_head(&reader->wait); + spin_lock_irqsave(&sp->lock, flags); + list_add(&reader->node, &sp->event_buffer->readers); + spin_unlock_irqrestore(&sp->lock, flags); +} + +void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader) +{ + unsigned long flags; + + spin_lock_irqsave(&sp->lock, flags); + list_del(&reader->node); + spin_unlock_irqrestore(&sp->lock, flags); +} + +int ibmasm_event_buffer_init(struct service_processor *sp) +{ + struct event_buffer *buffer; + struct ibmasm_event *event; + int i; + + buffer = kmalloc(sizeof(struct event_buffer), GFP_KERNEL); + if (!buffer) + return 1; + + buffer->next_index = 0; + buffer->next_serial_number = 1; + + event = buffer->events; + for (i=0; i<IBMASM_NUM_EVENTS; i++, event++) + event->serial_number = 0; + + INIT_LIST_HEAD(&buffer->readers); + + sp->event_buffer = buffer; + + return 0; +} + +void ibmasm_event_buffer_exit(struct service_processor *sp) +{ + kfree(sp->event_buffer); +} diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c new file mode 100644 index 00000000000..1bc4306572a --- /dev/null +++ b/drivers/misc/ibmasm/heartbeat.c @@ -0,0 +1,101 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/notifier.h> +#include "ibmasm.h" +#include "dot_command.h" +#include "lowlevel.h" + +static int suspend_heartbeats = 0; + +/* + * Once the driver indicates to the service processor that it is running + * - see send_os_state() - the service processor sends periodic heartbeats + * to the driver. The driver must respond to the heartbeats or else the OS + * will be rebooted. + * In the case of a panic the interrupt handler continues to work and thus + * continues to respond to heartbeats, making the service processor believe + * the OS is still running and thus preventing a reboot. + * To prevent this from happening a callback is added the panic_notifier_list. + * Before responding to a heartbeat the driver checks if a panic has happened, + * if yes it suspends heartbeat, causing the service processor to reboot as + * expected. + */ +static int panic_happened(struct notifier_block *n, unsigned long val, void *v) +{ + suspend_heartbeats = 1; + return 0; +} + +static struct notifier_block panic_notifier = { panic_happened, NULL, 1 }; + +void ibmasm_register_panic_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier); +} + +void ibmasm_unregister_panic_notifier(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, + &panic_notifier); +} + + +int ibmasm_heartbeat_init(struct service_processor *sp) +{ + sp->heartbeat = ibmasm_new_command(sp, HEARTBEAT_BUFFER_SIZE); + if (sp->heartbeat == NULL) + return -ENOMEM; + + return 0; +} + +void ibmasm_heartbeat_exit(struct service_processor *sp) +{ + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + suspend_heartbeats = 1; + command_put(sp->heartbeat); +} + +void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size) +{ + struct command *cmd = sp->heartbeat; + struct dot_command_header *header = (struct dot_command_header *)cmd->buffer; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + if (suspend_heartbeats) + return; + + /* return the received dot command to sender */ + cmd->status = IBMASM_CMD_PENDING; + size = min(size, cmd->buffer_size); + memcpy_fromio(cmd->buffer, message, size); + header->type = sp_write; + ibmasm_exec_command(sp, cmd); +} diff --git a/drivers/misc/ibmasm/i2o.h b/drivers/misc/ibmasm/i2o.h new file mode 100644 index 00000000000..bf2c738d2b7 --- /dev/null +++ b/drivers/misc/ibmasm/i2o.h @@ -0,0 +1,77 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#pragma pack(1) +struct i2o_header { + u8 version; + u8 message_flags; + u16 message_size; + u8 target; + u8 initiator_and_target; + u8 initiator; + u8 function; + u32 initiator_context; +}; +#pragma pack() + +#define I2O_HEADER_TEMPLATE \ + { .version = 0x01, \ + .message_flags = 0x00, \ + .function = 0xFF, \ + .initiator = 0x00, \ + .initiator_and_target = 0x40, \ + .target = 0x00, \ + .initiator_context = 0x0 } + +#define I2O_MESSAGE_SIZE 0x1000 +#define I2O_COMMAND_SIZE (I2O_MESSAGE_SIZE - sizeof(struct i2o_header)) + +#pragma pack(1) +struct i2o_message { + struct i2o_header header; + void *data; +}; +#pragma pack() + +static inline unsigned short outgoing_message_size(unsigned int data_size) +{ + unsigned int size; + unsigned short i2o_size; + + if (data_size > I2O_COMMAND_SIZE) + data_size = I2O_COMMAND_SIZE; + + size = sizeof(struct i2o_header) + data_size; + + i2o_size = size / sizeof(u32); + + if (size % sizeof(u32)) + i2o_size++; + + return i2o_size; +} + +static inline u32 incoming_data_size(struct i2o_message *i2o_message) +{ + return (sizeof(u32) * i2o_message->header.message_size); +} diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h new file mode 100644 index 00000000000..4d8a4e248b3 --- /dev/null +++ b/drivers/misc/ibmasm/ibmasm.h @@ -0,0 +1,220 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/kref.h> +#include <linux/device.h> +#include <linux/input.h> + +/* Driver identification */ +#define DRIVER_NAME "ibmasm" +#define DRIVER_VERSION "1.0" +#define DRIVER_AUTHOR "Max Asbock <masbock@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>" +#define DRIVER_DESC "IBM ASM Service Processor Driver" + +#define err(msg) printk(KERN_ERR "%s: " msg "\n", DRIVER_NAME) +#define info(msg) printk(KERN_INFO "%s: " msg "\n", DRIVER_NAME) + +extern int ibmasm_debug; +#define dbg(STR, ARGS...) \ + do { \ + if (ibmasm_debug) \ + printk(KERN_DEBUG STR , ##ARGS); \ + } while (0) + +static inline char *get_timestamp(char *buf) +{ + struct timeval now; + do_gettimeofday(&now); + sprintf(buf, "%lu.%lu", now.tv_sec, now.tv_usec); + return buf; +} + +#define IBMASM_CMD_PENDING 0 +#define IBMASM_CMD_COMPLETE 1 +#define IBMASM_CMD_FAILED 2 + +#define IBMASM_CMD_TIMEOUT_NORMAL 45 +#define IBMASM_CMD_TIMEOUT_EXTRA 240 + +#define IBMASM_CMD_MAX_BUFFER_SIZE 0x8000 + +#define REVERSE_HEARTBEAT_TIMEOUT 120 + +#define HEARTBEAT_BUFFER_SIZE 0x400 + +#ifdef IA64 +#define IBMASM_DRIVER_VPD "Lin64 6.08 " +#else +#define IBMASM_DRIVER_VPD "Lin32 6.08 " +#endif + +#define SYSTEM_STATE_OS_UP 5 +#define SYSTEM_STATE_OS_DOWN 4 + +#define IBMASM_NAME_SIZE 16 + +#define IBMASM_NUM_EVENTS 10 +#define IBMASM_EVENT_MAX_SIZE 2048u + + +struct command { + struct list_head queue_node; + wait_queue_head_t wait; + unsigned char *buffer; + size_t buffer_size; + int status; + struct kref kref; + spinlock_t *lock; +}; +#define to_command(c) container_of(c, struct command, kref) + +void ibmasm_free_command(struct kref *kref); +static inline void command_put(struct command *cmd) +{ + unsigned long flags; + spinlock_t *lock = cmd->lock; + + spin_lock_irqsave(lock, flags); + kref_put(&cmd->kref, ibmasm_free_command); + spin_unlock_irqrestore(lock, flags); +} + +static inline void command_get(struct command *cmd) +{ + kref_get(&cmd->kref); +} + + +struct ibmasm_event { + unsigned int serial_number; + unsigned int data_size; + unsigned char data[IBMASM_EVENT_MAX_SIZE]; +}; + +struct event_buffer { + struct ibmasm_event events[IBMASM_NUM_EVENTS]; + unsigned int next_serial_number; + unsigned int next_index; + struct list_head readers; +}; + +struct event_reader { + int cancelled; + unsigned int next_serial_number; + wait_queue_head_t wait; + struct list_head node; + unsigned int data_size; + unsigned char data[IBMASM_EVENT_MAX_SIZE]; +}; + +struct reverse_heartbeat { + wait_queue_head_t wait; + unsigned int stopped; +}; + +struct ibmasm_remote { + struct input_dev *keybd_dev; + struct input_dev *mouse_dev; +}; + +struct service_processor { + struct list_head node; + spinlock_t lock; + void __iomem *base_address; + unsigned int irq; + struct command *current_command; + struct command *heartbeat; + struct list_head command_queue; + struct event_buffer *event_buffer; + char dirname[IBMASM_NAME_SIZE]; + char devname[IBMASM_NAME_SIZE]; + unsigned int number; + struct ibmasm_remote remote; + int serial_line; + struct device *dev; +}; + +/* command processing */ +struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size); +void ibmasm_exec_command(struct service_processor *sp, struct command *cmd); +void ibmasm_wait_for_response(struct command *cmd, int timeout); +void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size); + +/* event processing */ +int ibmasm_event_buffer_init(struct service_processor *sp); +void ibmasm_event_buffer_exit(struct service_processor *sp); +void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size); +void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader); +void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader); +int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader); +void ibmasm_cancel_next_event(struct event_reader *reader); + +/* heartbeat - from SP to OS */ +void ibmasm_register_panic_notifier(void); +void ibmasm_unregister_panic_notifier(void); +int ibmasm_heartbeat_init(struct service_processor *sp); +void ibmasm_heartbeat_exit(struct service_processor *sp); +void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size); + +/* reverse heartbeat - from OS to SP */ +void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb); +int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb); +void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb); + +/* dot commands */ +void ibmasm_receive_message(struct service_processor *sp, void *data, int data_size); +int ibmasm_send_driver_vpd(struct service_processor *sp); +int ibmasm_send_os_state(struct service_processor *sp, int os_state); + +/* low level message processing */ +int ibmasm_send_i2o_message(struct service_processor *sp); +irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id); + +/* remote console */ +void ibmasm_handle_mouse_interrupt(struct service_processor *sp); +int ibmasm_init_remote_input_dev(struct service_processor *sp); +void ibmasm_free_remote_input_dev(struct service_processor *sp); + +/* file system */ +int ibmasmfs_register(void); +void ibmasmfs_unregister(void); +void ibmasmfs_add_sp(struct service_processor *sp); + +/* uart */ +#ifdef CONFIG_SERIAL_8250 +void ibmasm_register_uart(struct service_processor *sp); +void ibmasm_unregister_uart(struct service_processor *sp); +#else +#define ibmasm_register_uart(sp) do { } while(0) +#define ibmasm_unregister_uart(sp) do { } while(0) +#endif diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c new file mode 100644 index 00000000000..aecf40ecb3a --- /dev/null +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -0,0 +1,635 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +/* + * Parts of this code are based on an article by Jonathan Corbet + * that appeared in Linux Weekly News. + */ + + +/* + * The IBMASM file virtual filesystem. It creates the following hierarchy + * dymamically when mounted from user space: + * + * /ibmasm + * |-- 0 + * | |-- command + * | |-- event + * | |-- reverse_heartbeat + * | `-- remote_video + * | |-- depth + * | |-- height + * | `-- width + * . + * . + * . + * `-- n + * |-- command + * |-- event + * |-- reverse_heartbeat + * `-- remote_video + * |-- depth + * |-- height + * `-- width + * + * For each service processor the following files are created: + * + * command: execute dot commands + * write: execute a dot command on the service processor + * read: return the result of a previously executed dot command + * + * events: listen for service processor events + * read: sleep (interruptible) until an event occurs + * write: wakeup sleeping event listener + * + * reverse_heartbeat: send a heartbeat to the service processor + * read: sleep (interruptible) until the reverse heartbeat fails + * write: wakeup sleeping heartbeat listener + * + * remote_video/width + * remote_video/height + * remote_video/width: control remote display settings + * write: set value + * read: read value + */ + +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include "ibmasm.h" +#include "remote.h" +#include "dot_command.h" + +#define IBMASMFS_MAGIC 0x66726f67 + +static LIST_HEAD(service_processors); + +static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode); +static void ibmasmfs_create_files (struct super_block *sb, struct dentry *root); +static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent); + + +static int ibmasmfs_get_super(struct file_system_type *fst, + int flags, const char *name, void *data, + struct vfsmount *mnt) +{ + return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt); +} + +static const struct super_operations ibmasmfs_s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations; + +static struct file_system_type ibmasmfs_type = { + .owner = THIS_MODULE, + .name = "ibmasmfs", + .get_sb = ibmasmfs_get_super, + .kill_sb = kill_litter_super, +}; + +static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent) +{ + struct inode *root; + struct dentry *root_dentry; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = IBMASMFS_MAGIC; + sb->s_op = &ibmasmfs_s_ops; + sb->s_time_gran = 1; + + root = ibmasmfs_make_inode (sb, S_IFDIR | 0500); + if (!root) + return -ENOMEM; + + root->i_op = &simple_dir_inode_operations; + root->i_fop = ibmasmfs_dir_ops; + + root_dentry = d_alloc_root(root); + if (!root_dentry) { + iput(root); + return -ENOMEM; + } + sb->s_root = root_dentry; + + ibmasmfs_create_files(sb, root_dentry); + return 0; +} + +static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret = new_inode(sb); + + if (ret) { + ret->i_mode = mode; + ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; + } + return ret; +} + +static struct dentry *ibmasmfs_create_file (struct super_block *sb, + struct dentry *parent, + const char *name, + const struct file_operations *fops, + void *data, + int mode) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = ibmasmfs_make_inode(sb, S_IFREG | mode); + if (!inode) { + dput(dentry); + return NULL; + } + + inode->i_fop = fops; + inode->i_private = data; + + d_add(dentry, inode); + return dentry; +} + +static struct dentry *ibmasmfs_create_dir (struct super_block *sb, + struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = ibmasmfs_make_inode(sb, S_IFDIR | 0500); + if (!inode) { + dput(dentry); + return NULL; + } + + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = ibmasmfs_dir_ops; + + d_add(dentry, inode); + return dentry; +} + +int ibmasmfs_register(void) +{ + return register_filesystem(&ibmasmfs_type); +} + +void ibmasmfs_unregister(void) +{ + unregister_filesystem(&ibmasmfs_type); +} + +void ibmasmfs_add_sp(struct service_processor *sp) +{ + list_add(&sp->node, &service_processors); +} + +/* struct to save state between command file operations */ +struct ibmasmfs_command_data { + struct service_processor *sp; + struct command *command; +}; + +/* struct to save state between event file operations */ +struct ibmasmfs_event_data { + struct service_processor *sp; + struct event_reader reader; + int active; +}; + +/* struct to save state between reverse heartbeat file operations */ +struct ibmasmfs_heartbeat_data { + struct service_processor *sp; + struct reverse_heartbeat heartbeat; + int active; +}; + +static int command_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_command_data *command_data; + + if (!inode->i_private) + return -ENODEV; + + command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL); + if (!command_data) + return -ENOMEM; + + command_data->command = NULL; + command_data->sp = inode->i_private; + file->private_data = command_data; + return 0; +} + +static int command_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + + if (command_data->command) + command_put(command_data->command); + + kfree(command_data); + return 0; +} + +static ssize_t command_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + struct command *cmd; + int len; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE) + return 0; + if (*offset != 0) + return 0; + + spin_lock_irqsave(&command_data->sp->lock, flags); + cmd = command_data->command; + if (cmd == NULL) { + spin_unlock_irqrestore(&command_data->sp->lock, flags); + return 0; + } + command_data->command = NULL; + spin_unlock_irqrestore(&command_data->sp->lock, flags); + + if (cmd->status != IBMASM_CMD_COMPLETE) { + command_put(cmd); + return -EIO; + } + len = min(count, cmd->buffer_size); + if (copy_to_user(buf, cmd->buffer, len)) { + command_put(cmd); + return -EFAULT; + } + command_put(cmd); + + return len; +} + +static ssize_t command_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + struct command *cmd; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE) + return 0; + if (*offset != 0) + return 0; + + /* commands are executed sequentially, only one command at a time */ + if (command_data->command) + return -EAGAIN; + + cmd = ibmasm_new_command(command_data->sp, count); + if (!cmd) + return -ENOMEM; + + if (copy_from_user(cmd->buffer, ubuff, count)) { + command_put(cmd); + return -EFAULT; + } + + spin_lock_irqsave(&command_data->sp->lock, flags); + if (command_data->command) { + spin_unlock_irqrestore(&command_data->sp->lock, flags); + command_put(cmd); + return -EAGAIN; + } + command_data->command = cmd; + spin_unlock_irqrestore(&command_data->sp->lock, flags); + + ibmasm_exec_command(command_data->sp, cmd); + ibmasm_wait_for_response(cmd, get_dot_command_timeout(cmd->buffer)); + + return count; +} + +static int event_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_event_data *event_data; + struct service_processor *sp; + + if (!inode->i_private) + return -ENODEV; + + sp = inode->i_private; + + event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL); + if (!event_data) + return -ENOMEM; + + ibmasm_event_reader_register(sp, &event_data->reader); + + event_data->sp = sp; + event_data->active = 0; + file->private_data = event_data; + return 0; +} + +static int event_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + + ibmasm_event_reader_unregister(event_data->sp, &event_data->reader); + kfree(event_data); + return 0; +} + +static ssize_t event_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + struct event_reader *reader = &event_data->reader; + struct service_processor *sp = event_data->sp; + int ret; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_EVENT_MAX_SIZE) + return 0; + if (*offset != 0) + return 0; + + spin_lock_irqsave(&sp->lock, flags); + if (event_data->active) { + spin_unlock_irqrestore(&sp->lock, flags); + return -EBUSY; + } + event_data->active = 1; + spin_unlock_irqrestore(&sp->lock, flags); + + ret = ibmasm_get_next_event(sp, reader); + if (ret <= 0) + goto out; + + if (count < reader->data_size) { + ret = -EINVAL; + goto out; + } + + if (copy_to_user(buf, reader->data, reader->data_size)) { + ret = -EFAULT; + goto out; + } + ret = reader->data_size; + +out: + event_data->active = 0; + return ret; +} + +static ssize_t event_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + + if (*offset < 0) + return -EINVAL; + if (count != 1) + return 0; + if (*offset != 0) + return 0; + + ibmasm_cancel_next_event(&event_data->reader); + return 0; +} + +static int r_heartbeat_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_heartbeat_data *rhbeat; + + if (!inode->i_private) + return -ENODEV; + + rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL); + if (!rhbeat) + return -ENOMEM; + + rhbeat->sp = inode->i_private; + rhbeat->active = 0; + ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); + file->private_data = rhbeat; + return 0; +} + +static int r_heartbeat_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + + kfree(rhbeat); + return 0; +} + +static ssize_t r_heartbeat_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + unsigned long flags; + int result; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + /* allow only one reverse heartbeat per process */ + spin_lock_irqsave(&rhbeat->sp->lock, flags); + if (rhbeat->active) { + spin_unlock_irqrestore(&rhbeat->sp->lock, flags); + return -EBUSY; + } + rhbeat->active = 1; + spin_unlock_irqrestore(&rhbeat->sp->lock, flags); + + result = ibmasm_start_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); + rhbeat->active = 0; + + return result; +} + +static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + + if (*offset < 0) + return -EINVAL; + if (count != 1) + return 0; + if (*offset != 0) + return 0; + + if (rhbeat->active) + ibmasm_stop_reverse_heartbeat(&rhbeat->heartbeat); + + return 1; +} + +static int remote_settings_file_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static int remote_settings_file_close(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + void __iomem *address = (void __iomem *)file->private_data; + unsigned char *page; + int retval; + int len = 0; + unsigned int value; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + page = (unsigned char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + value = readl(address); + len = sprintf(page, "%d\n", value); + + if (copy_to_user(buf, page, len)) { + retval = -EFAULT; + goto exit; + } + *offset += len; + retval = len; + +exit: + free_page((unsigned long)page); + return retval; +} + +static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) +{ + void __iomem *address = (void __iomem *)file->private_data; + char *buff; + unsigned int value; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + buff = kzalloc (count + 1, GFP_KERNEL); + if (!buff) + return -ENOMEM; + + + if (copy_from_user(buff, ubuff, count)) { + kfree(buff); + return -EFAULT; + } + + value = simple_strtoul(buff, NULL, 10); + writel(value, address); + kfree(buff); + + return count; +} + +static const struct file_operations command_fops = { + .open = command_file_open, + .release = command_file_close, + .read = command_file_read, + .write = command_file_write, +}; + +static const struct file_operations event_fops = { + .open = event_file_open, + .release = event_file_close, + .read = event_file_read, + .write = event_file_write, +}; + +static const struct file_operations r_heartbeat_fops = { + .open = r_heartbeat_file_open, + .release = r_heartbeat_file_close, + .read = r_heartbeat_file_read, + .write = r_heartbeat_file_write, +}; + +static const struct file_operations remote_settings_fops = { + .open = remote_settings_file_open, + .release = remote_settings_file_close, + .read = remote_settings_file_read, + .write = remote_settings_file_write, +}; + + +static void ibmasmfs_create_files (struct super_block *sb, struct dentry *root) +{ + struct list_head *entry; + struct service_processor *sp; + + list_for_each(entry, &service_processors) { + struct dentry *dir; + struct dentry *remote_dir; + sp = list_entry(entry, struct service_processor, node); + dir = ibmasmfs_create_dir(sb, root, sp->dirname); + if (!dir) + continue; + + ibmasmfs_create_file(sb, dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR); + ibmasmfs_create_file(sb, dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR); + ibmasmfs_create_file(sb, dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR); + + remote_dir = ibmasmfs_create_dir(sb, dir, "remote_video"); + if (!remote_dir) + continue; + + ibmasmfs_create_file(sb, remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR); + ibmasmfs_create_file(sb, remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR); + ibmasmfs_create_file(sb, remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR); + } +} diff --git a/drivers/misc/ibmasm/lowlevel.c b/drivers/misc/ibmasm/lowlevel.c new file mode 100644 index 00000000000..4b2398e27fd --- /dev/null +++ b/drivers/misc/ibmasm/lowlevel.c @@ -0,0 +1,85 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include "ibmasm.h" +#include "lowlevel.h" +#include "i2o.h" +#include "dot_command.h" +#include "remote.h" + +static struct i2o_header header = I2O_HEADER_TEMPLATE; + + +int ibmasm_send_i2o_message(struct service_processor *sp) +{ + u32 mfa; + unsigned int command_size; + struct i2o_message *message; + struct command *command = sp->current_command; + + mfa = get_mfa_inbound(sp->base_address); + if (!mfa) + return 1; + + command_size = get_dot_command_size(command->buffer); + header.message_size = outgoing_message_size(command_size); + + message = get_i2o_message(sp->base_address, mfa); + + memcpy_toio(&message->header, &header, sizeof(struct i2o_header)); + memcpy_toio(&message->data, command->buffer, command_size); + + set_mfa_inbound(sp->base_address, mfa); + + return 0; +} + +irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id) +{ + u32 mfa; + struct service_processor *sp = (struct service_processor *)dev_id; + void __iomem *base_address = sp->base_address; + char tsbuf[32]; + + if (!sp_interrupt_pending(base_address)) + return IRQ_NONE; + + dbg("respond to interrupt at %s\n", get_timestamp(tsbuf)); + + if (mouse_interrupt_pending(sp)) { + ibmasm_handle_mouse_interrupt(sp); + clear_mouse_interrupt(sp); + } + + mfa = get_mfa_outbound(base_address); + if (valid_mfa(mfa)) { + struct i2o_message *msg = get_i2o_message(base_address, mfa); + ibmasm_receive_message(sp, &msg->data, incoming_data_size(msg)); + } else + dbg("didn't get a valid MFA\n"); + + set_mfa_outbound(base_address, mfa); + dbg("finished interrupt at %s\n", get_timestamp(tsbuf)); + + return IRQ_HANDLED; +} diff --git a/drivers/misc/ibmasm/lowlevel.h b/drivers/misc/ibmasm/lowlevel.h new file mode 100644 index 00000000000..766766523a6 --- /dev/null +++ b/drivers/misc/ibmasm/lowlevel.h @@ -0,0 +1,137 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +/* Condor service processor specific hardware definitions */ + +#ifndef __IBMASM_CONDOR_H__ +#define __IBMASM_CONDOR_H__ + +#include <asm/io.h> + +#define VENDORID_IBM 0x1014 +#define DEVICEID_RSA 0x010F + +#define GET_MFA_ADDR(x) (x & 0xFFFFFF00) + +#define MAILBOX_FULL(x) (x & 0x00000001) + +#define NO_MFAS_AVAILABLE 0xFFFFFFFF + + +#define INBOUND_QUEUE_PORT 0x40 /* contains address of next free MFA */ +#define OUTBOUND_QUEUE_PORT 0x44 /* contains address of posted MFA */ + +#define SP_INTR_MASK 0x00000008 +#define UART_INTR_MASK 0x00000010 + +#define INTR_STATUS_REGISTER 0x13A0 +#define INTR_CONTROL_REGISTER 0x13A4 + +#define SCOUT_COM_A_BASE 0x0000 +#define SCOUT_COM_B_BASE 0x0100 +#define SCOUT_COM_C_BASE 0x0200 +#define SCOUT_COM_D_BASE 0x0300 + +static inline int sp_interrupt_pending(void __iomem *base_address) +{ + return SP_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER); +} + +static inline int uart_interrupt_pending(void __iomem *base_address) +{ + return UART_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER); +} + +static inline void ibmasm_enable_interrupts(void __iomem *base_address, int mask) +{ + void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER; + writel( readl(ctrl_reg) & ~mask, ctrl_reg); +} + +static inline void ibmasm_disable_interrupts(void __iomem *base_address, int mask) +{ + void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER; + writel( readl(ctrl_reg) | mask, ctrl_reg); +} + +static inline void enable_sp_interrupts(void __iomem *base_address) +{ + ibmasm_enable_interrupts(base_address, SP_INTR_MASK); +} + +static inline void disable_sp_interrupts(void __iomem *base_address) +{ + ibmasm_disable_interrupts(base_address, SP_INTR_MASK); +} + +static inline void enable_uart_interrupts(void __iomem *base_address) +{ + ibmasm_enable_interrupts(base_address, UART_INTR_MASK); +} + +static inline void disable_uart_interrupts(void __iomem *base_address) +{ + ibmasm_disable_interrupts(base_address, UART_INTR_MASK); +} + +#define valid_mfa(mfa) ( (mfa) != NO_MFAS_AVAILABLE ) + +static inline u32 get_mfa_outbound(void __iomem *base_address) +{ + int retry; + u32 mfa; + + for (retry=0; retry<=10; retry++) { + mfa = readl(base_address + OUTBOUND_QUEUE_PORT); + if (valid_mfa(mfa)) + break; + } + return mfa; +} + +static inline void set_mfa_outbound(void __iomem *base_address, u32 mfa) +{ + writel(mfa, base_address + OUTBOUND_QUEUE_PORT); +} + +static inline u32 get_mfa_inbound(void __iomem *base_address) +{ + u32 mfa = readl(base_address + INBOUND_QUEUE_PORT); + + if (MAILBOX_FULL(mfa)) + return 0; + + return mfa; +} + +static inline void set_mfa_inbound(void __iomem *base_address, u32 mfa) +{ + writel(mfa, base_address + INBOUND_QUEUE_PORT); +} + +static inline struct i2o_message *get_i2o_message(void __iomem *base_address, u32 mfa) +{ + return (struct i2o_message *)(GET_MFA_ADDR(mfa) + base_address); +} + +#endif /* __IBMASM_CONDOR_H__ */ diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c new file mode 100644 index 00000000000..dc14b0b9cbf --- /dev/null +++ b/drivers/misc/ibmasm/module.c @@ -0,0 +1,237 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + * This driver is based on code originally written by Pete Reynolds + * and others. + * + */ + +/* + * The ASM device driver does the following things: + * + * 1) When loaded it sends a message to the service processor, + * indicating that an OS is * running. This causes the service processor + * to send periodic heartbeats to the OS. + * + * 2) Answers the periodic heartbeats sent by the service processor. + * Failure to do so would result in system reboot. + * + * 3) Acts as a pass through for dot commands sent from user applications. + * The interface for this is the ibmasmfs file system. + * + * 4) Allows user applications to register for event notification. Events + * are sent to the driver through interrupts. They can be read from user + * space through the ibmasmfs file system. + * + * 5) Allows user space applications to send heartbeats to the service + * processor (aka reverse heartbeats). Again this happens through ibmasmfs. + * + * 6) Handles remote mouse and keyboard event interrupts and makes them + * available to user applications through ibmasmfs. + * + */ + +#include <linux/pci.h> +#include <linux/init.h> +#include "ibmasm.h" +#include "lowlevel.h" +#include "remote.h" + +int ibmasm_debug = 0; +module_param(ibmasm_debug, int , S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(ibmasm_debug, " Set debug mode on or off"); + + +static int __devinit ibmasm_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int result; + struct service_processor *sp; + + if ((result = pci_enable_device(pdev))) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + return result; + } + if ((result = pci_request_regions(pdev, DRIVER_NAME))) { + dev_err(&pdev->dev, "Failed to allocate PCI resources\n"); + goto error_resources; + } + /* vnc client won't work without bus-mastering */ + pci_set_master(pdev); + + sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL); + if (sp == NULL) { + dev_err(&pdev->dev, "Failed to allocate memory\n"); + result = -ENOMEM; + goto error_kmalloc; + } + + spin_lock_init(&sp->lock); + INIT_LIST_HEAD(&sp->command_queue); + + pci_set_drvdata(pdev, (void *)sp); + sp->dev = &pdev->dev; + sp->number = pdev->bus->number; + snprintf(sp->dirname, IBMASM_NAME_SIZE, "%d", sp->number); + snprintf(sp->devname, IBMASM_NAME_SIZE, "%s%d", DRIVER_NAME, sp->number); + + if (ibmasm_event_buffer_init(sp)) { + dev_err(sp->dev, "Failed to allocate event buffer\n"); + goto error_eventbuffer; + } + + if (ibmasm_heartbeat_init(sp)) { + dev_err(sp->dev, "Failed to allocate heartbeat command\n"); + goto error_heartbeat; + } + + sp->irq = pdev->irq; + sp->base_address = pci_ioremap_bar(pdev, 0); + if (!sp->base_address) { + dev_err(sp->dev, "Failed to ioremap pci memory\n"); + result = -ENODEV; + goto error_ioremap; + } + + result = request_irq(sp->irq, ibmasm_interrupt_handler, IRQF_SHARED, sp->devname, (void*)sp); + if (result) { + dev_err(sp->dev, "Failed to register interrupt handler\n"); + goto error_request_irq; + } + + enable_sp_interrupts(sp->base_address); + + result = ibmasm_init_remote_input_dev(sp); + if (result) { + dev_err(sp->dev, "Failed to initialize remote queue\n"); + goto error_send_message; + } + + result = ibmasm_send_driver_vpd(sp); + if (result) { + dev_err(sp->dev, "Failed to send driver VPD to service processor\n"); + goto error_send_message; + } + result = ibmasm_send_os_state(sp, SYSTEM_STATE_OS_UP); + if (result) { + dev_err(sp->dev, "Failed to send OS state to service processor\n"); + goto error_send_message; + } + ibmasmfs_add_sp(sp); + + ibmasm_register_uart(sp); + + return 0; + +error_send_message: + disable_sp_interrupts(sp->base_address); + ibmasm_free_remote_input_dev(sp); + free_irq(sp->irq, (void *)sp); +error_request_irq: + iounmap(sp->base_address); +error_ioremap: + ibmasm_heartbeat_exit(sp); +error_heartbeat: + ibmasm_event_buffer_exit(sp); +error_eventbuffer: + pci_set_drvdata(pdev, NULL); + kfree(sp); +error_kmalloc: + pci_release_regions(pdev); +error_resources: + pci_disable_device(pdev); + + return result; +} + +static void __devexit ibmasm_remove_one(struct pci_dev *pdev) +{ + struct service_processor *sp = (struct service_processor *)pci_get_drvdata(pdev); + + dbg("Unregistering UART\n"); + ibmasm_unregister_uart(sp); + dbg("Sending OS down message\n"); + if (ibmasm_send_os_state(sp, SYSTEM_STATE_OS_DOWN)) + err("failed to get repsonse to 'Send OS State' command\n"); + dbg("Disabling heartbeats\n"); + ibmasm_heartbeat_exit(sp); + dbg("Disabling interrupts\n"); + disable_sp_interrupts(sp->base_address); + dbg("Freeing SP irq\n"); + free_irq(sp->irq, (void *)sp); + dbg("Cleaning up\n"); + ibmasm_free_remote_input_dev(sp); + iounmap(sp->base_address); + ibmasm_event_buffer_exit(sp); + pci_set_drvdata(pdev, NULL); + kfree(sp); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_device_id ibmasm_pci_table[] = +{ + { PCI_DEVICE(VENDORID_IBM, DEVICEID_RSA) }, + {}, +}; + +static struct pci_driver ibmasm_driver = { + .name = DRIVER_NAME, + .id_table = ibmasm_pci_table, + .probe = ibmasm_init_one, + .remove = __devexit_p(ibmasm_remove_one), +}; + +static void __exit ibmasm_exit (void) +{ + ibmasm_unregister_panic_notifier(); + ibmasmfs_unregister(); + pci_unregister_driver(&ibmasm_driver); + info(DRIVER_DESC " version " DRIVER_VERSION " unloaded"); +} + +static int __init ibmasm_init(void) +{ + int result; + + result = ibmasmfs_register(); + if (result) { + err("Failed to register ibmasmfs file system"); + return result; + } + result = pci_register_driver(&ibmasm_driver); + if (result) { + ibmasmfs_unregister(); + return result; + } + ibmasm_register_panic_notifier(); + info(DRIVER_DESC " version " DRIVER_VERSION " loaded"); + return 0; +} + +module_init(ibmasm_init); +module_exit(ibmasm_exit); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, ibmasm_pci_table); + diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c new file mode 100644 index 00000000000..2de487ac788 --- /dev/null +++ b/drivers/misc/ibmasm/r_heartbeat.c @@ -0,0 +1,99 @@ + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/sched.h> +#include "ibmasm.h" +#include "dot_command.h" + +/* + * Reverse Heartbeat, i.e. heartbeats sent from the driver to the + * service processor. + * These heartbeats are initiated by user level programs. + */ + +/* the reverse heartbeat dot command */ +#pragma pack(1) +static struct { + struct dot_command_header header; + unsigned char command[3]; +} rhb_dot_cmd = { + .header = { + .type = sp_read, + .command_size = 3, + .data_size = 0, + .status = 0 + }, + .command = { 4, 3, 6 } +}; +#pragma pack() + +void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb) +{ + init_waitqueue_head(&rhb->wait); + rhb->stopped = 0; +} + +/** + * start_reverse_heartbeat + * Loop forever, sending a reverse heartbeat dot command to the service + * processor, then sleeping. The loop comes to an end if the service + * processor fails to respond 3 times or we were interrupted. + */ +int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb) +{ + struct command *cmd; + int times_failed = 0; + int result = 1; + + cmd = ibmasm_new_command(sp, sizeof rhb_dot_cmd); + if (!cmd) + return -ENOMEM; + + while (times_failed < 3) { + memcpy(cmd->buffer, (void *)&rhb_dot_cmd, sizeof rhb_dot_cmd); + cmd->status = IBMASM_CMD_PENDING; + ibmasm_exec_command(sp, cmd); + ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL); + + if (cmd->status != IBMASM_CMD_COMPLETE) + times_failed++; + + wait_event_interruptible_timeout(rhb->wait, + rhb->stopped, + REVERSE_HEARTBEAT_TIMEOUT * HZ); + + if (signal_pending(current) || rhb->stopped) { + result = -EINTR; + break; + } + } + command_put(cmd); + rhb->stopped = 0; + + return result; +} + +void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb) +{ + rhb->stopped = 1; + wake_up_interruptible(&rhb->wait); +} diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c new file mode 100644 index 00000000000..477bb43c899 --- /dev/null +++ b/drivers/misc/ibmasm/remote.c @@ -0,0 +1,282 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Authors: Max Asböck <amax@us.ibm.com> + * Vernon Mauery <vernux@us.ibm.com> + * + */ + +/* Remote mouse and keyboard event handling functions */ + +#include <linux/pci.h> +#include "ibmasm.h" +#include "remote.h" + +#define MOUSE_X_MAX 1600 +#define MOUSE_Y_MAX 1200 + +static const unsigned short xlate_high[XLATE_SIZE] = { + [KEY_SYM_ENTER & 0xff] = KEY_ENTER, + [KEY_SYM_KPSLASH & 0xff] = KEY_KPSLASH, + [KEY_SYM_KPSTAR & 0xff] = KEY_KPASTERISK, + [KEY_SYM_KPMINUS & 0xff] = KEY_KPMINUS, + [KEY_SYM_KPDOT & 0xff] = KEY_KPDOT, + [KEY_SYM_KPPLUS & 0xff] = KEY_KPPLUS, + [KEY_SYM_KP0 & 0xff] = KEY_KP0, + [KEY_SYM_KP1 & 0xff] = KEY_KP1, + [KEY_SYM_KP2 & 0xff] = KEY_KP2, [KEY_SYM_KPDOWN & 0xff] = KEY_KP2, + [KEY_SYM_KP3 & 0xff] = KEY_KP3, + [KEY_SYM_KP4 & 0xff] = KEY_KP4, [KEY_SYM_KPLEFT & 0xff] = KEY_KP4, + [KEY_SYM_KP5 & 0xff] = KEY_KP5, + [KEY_SYM_KP6 & 0xff] = KEY_KP6, [KEY_SYM_KPRIGHT & 0xff] = KEY_KP6, + [KEY_SYM_KP7 & 0xff] = KEY_KP7, + [KEY_SYM_KP8 & 0xff] = KEY_KP8, [KEY_SYM_KPUP & 0xff] = KEY_KP8, + [KEY_SYM_KP9 & 0xff] = KEY_KP9, + [KEY_SYM_BK_SPC & 0xff] = KEY_BACKSPACE, + [KEY_SYM_TAB & 0xff] = KEY_TAB, + [KEY_SYM_CTRL & 0xff] = KEY_LEFTCTRL, + [KEY_SYM_ALT & 0xff] = KEY_LEFTALT, + [KEY_SYM_INSERT & 0xff] = KEY_INSERT, + [KEY_SYM_DELETE & 0xff] = KEY_DELETE, + [KEY_SYM_SHIFT & 0xff] = KEY_LEFTSHIFT, + [KEY_SYM_UARROW & 0xff] = KEY_UP, + [KEY_SYM_DARROW & 0xff] = KEY_DOWN, + [KEY_SYM_LARROW & 0xff] = KEY_LEFT, + [KEY_SYM_RARROW & 0xff] = KEY_RIGHT, + [KEY_SYM_ESCAPE & 0xff] = KEY_ESC, + [KEY_SYM_PAGEUP & 0xff] = KEY_PAGEUP, + [KEY_SYM_PAGEDOWN & 0xff] = KEY_PAGEDOWN, + [KEY_SYM_HOME & 0xff] = KEY_HOME, + [KEY_SYM_END & 0xff] = KEY_END, + [KEY_SYM_F1 & 0xff] = KEY_F1, + [KEY_SYM_F2 & 0xff] = KEY_F2, + [KEY_SYM_F3 & 0xff] = KEY_F3, + [KEY_SYM_F4 & 0xff] = KEY_F4, + [KEY_SYM_F5 & 0xff] = KEY_F5, + [KEY_SYM_F6 & 0xff] = KEY_F6, + [KEY_SYM_F7 & 0xff] = KEY_F7, + [KEY_SYM_F8 & 0xff] = KEY_F8, + [KEY_SYM_F9 & 0xff] = KEY_F9, + [KEY_SYM_F10 & 0xff] = KEY_F10, + [KEY_SYM_F11 & 0xff] = KEY_F11, + [KEY_SYM_F12 & 0xff] = KEY_F12, + [KEY_SYM_CAP_LOCK & 0xff] = KEY_CAPSLOCK, + [KEY_SYM_NUM_LOCK & 0xff] = KEY_NUMLOCK, + [KEY_SYM_SCR_LOCK & 0xff] = KEY_SCROLLLOCK, +}; + +static const unsigned short xlate[XLATE_SIZE] = { + [NO_KEYCODE] = KEY_RESERVED, + [KEY_SYM_SPACE] = KEY_SPACE, + [KEY_SYM_TILDE] = KEY_GRAVE, [KEY_SYM_BKTIC] = KEY_GRAVE, + [KEY_SYM_ONE] = KEY_1, [KEY_SYM_BANG] = KEY_1, + [KEY_SYM_TWO] = KEY_2, [KEY_SYM_AT] = KEY_2, + [KEY_SYM_THREE] = KEY_3, [KEY_SYM_POUND] = KEY_3, + [KEY_SYM_FOUR] = KEY_4, [KEY_SYM_DOLLAR] = KEY_4, + [KEY_SYM_FIVE] = KEY_5, [KEY_SYM_PERCENT] = KEY_5, + [KEY_SYM_SIX] = KEY_6, [KEY_SYM_CARAT] = KEY_6, + [KEY_SYM_SEVEN] = KEY_7, [KEY_SYM_AMPER] = KEY_7, + [KEY_SYM_EIGHT] = KEY_8, [KEY_SYM_STAR] = KEY_8, + [KEY_SYM_NINE] = KEY_9, [KEY_SYM_LPAREN] = KEY_9, + [KEY_SYM_ZERO] = KEY_0, [KEY_SYM_RPAREN] = KEY_0, + [KEY_SYM_MINUS] = KEY_MINUS, [KEY_SYM_USCORE] = KEY_MINUS, + [KEY_SYM_EQUAL] = KEY_EQUAL, [KEY_SYM_PLUS] = KEY_EQUAL, + [KEY_SYM_LBRKT] = KEY_LEFTBRACE, [KEY_SYM_LCURLY] = KEY_LEFTBRACE, + [KEY_SYM_RBRKT] = KEY_RIGHTBRACE, [KEY_SYM_RCURLY] = KEY_RIGHTBRACE, + [KEY_SYM_SLASH] = KEY_BACKSLASH, [KEY_SYM_PIPE] = KEY_BACKSLASH, + [KEY_SYM_TIC] = KEY_APOSTROPHE, [KEY_SYM_QUOTE] = KEY_APOSTROPHE, + [KEY_SYM_SEMIC] = KEY_SEMICOLON, [KEY_SYM_COLON] = KEY_SEMICOLON, + [KEY_SYM_COMMA] = KEY_COMMA, [KEY_SYM_LT] = KEY_COMMA, + [KEY_SYM_PERIOD] = KEY_DOT, [KEY_SYM_GT] = KEY_DOT, + [KEY_SYM_BSLASH] = KEY_SLASH, [KEY_SYM_QMARK] = KEY_SLASH, + [KEY_SYM_A] = KEY_A, [KEY_SYM_a] = KEY_A, + [KEY_SYM_B] = KEY_B, [KEY_SYM_b] = KEY_B, + [KEY_SYM_C] = KEY_C, [KEY_SYM_c] = KEY_C, + [KEY_SYM_D] = KEY_D, [KEY_SYM_d] = KEY_D, + [KEY_SYM_E] = KEY_E, [KEY_SYM_e] = KEY_E, + [KEY_SYM_F] = KEY_F, [KEY_SYM_f] = KEY_F, + [KEY_SYM_G] = KEY_G, [KEY_SYM_g] = KEY_G, + [KEY_SYM_H] = KEY_H, [KEY_SYM_h] = KEY_H, + [KEY_SYM_I] = KEY_I, [KEY_SYM_i] = KEY_I, + [KEY_SYM_J] = KEY_J, [KEY_SYM_j] = KEY_J, + [KEY_SYM_K] = KEY_K, [KEY_SYM_k] = KEY_K, + [KEY_SYM_L] = KEY_L, [KEY_SYM_l] = KEY_L, + [KEY_SYM_M] = KEY_M, [KEY_SYM_m] = KEY_M, + [KEY_SYM_N] = KEY_N, [KEY_SYM_n] = KEY_N, + [KEY_SYM_O] = KEY_O, [KEY_SYM_o] = KEY_O, + [KEY_SYM_P] = KEY_P, [KEY_SYM_p] = KEY_P, + [KEY_SYM_Q] = KEY_Q, [KEY_SYM_q] = KEY_Q, + [KEY_SYM_R] = KEY_R, [KEY_SYM_r] = KEY_R, + [KEY_SYM_S] = KEY_S, [KEY_SYM_s] = KEY_S, + [KEY_SYM_T] = KEY_T, [KEY_SYM_t] = KEY_T, + [KEY_SYM_U] = KEY_U, [KEY_SYM_u] = KEY_U, + [KEY_SYM_V] = KEY_V, [KEY_SYM_v] = KEY_V, + [KEY_SYM_W] = KEY_W, [KEY_SYM_w] = KEY_W, + [KEY_SYM_X] = KEY_X, [KEY_SYM_x] = KEY_X, + [KEY_SYM_Y] = KEY_Y, [KEY_SYM_y] = KEY_Y, + [KEY_SYM_Z] = KEY_Z, [KEY_SYM_z] = KEY_Z, +}; + +static void print_input(struct remote_input *input) +{ + if (input->type == INPUT_TYPE_MOUSE) { + unsigned char buttons = input->mouse_buttons; + dbg("remote mouse movement: (x,y)=(%d,%d)%s%s%s%s\n", + input->data.mouse.x, input->data.mouse.y, + (buttons) ? " -- buttons:" : "", + (buttons & REMOTE_BUTTON_LEFT) ? "left " : "", + (buttons & REMOTE_BUTTON_MIDDLE) ? "middle " : "", + (buttons & REMOTE_BUTTON_RIGHT) ? "right" : "" + ); + } else { + dbg("remote keypress (code, flag, down):" + "%d (0x%x) [0x%x] [0x%x]\n", + input->data.keyboard.key_code, + input->data.keyboard.key_code, + input->data.keyboard.key_flag, + input->data.keyboard.key_down + ); + } +} + +static void send_mouse_event(struct input_dev *dev, struct remote_input *input) +{ + unsigned char buttons = input->mouse_buttons; + + input_report_abs(dev, ABS_X, input->data.mouse.x); + input_report_abs(dev, ABS_Y, input->data.mouse.y); + input_report_key(dev, BTN_LEFT, buttons & REMOTE_BUTTON_LEFT); + input_report_key(dev, BTN_MIDDLE, buttons & REMOTE_BUTTON_MIDDLE); + input_report_key(dev, BTN_RIGHT, buttons & REMOTE_BUTTON_RIGHT); + input_sync(dev); +} + +static void send_keyboard_event(struct input_dev *dev, + struct remote_input *input) +{ + unsigned int key; + unsigned short code = input->data.keyboard.key_code; + + if (code & 0xff00) + key = xlate_high[code & 0xff]; + else + key = xlate[code]; + input_report_key(dev, key, input->data.keyboard.key_down); + input_sync(dev); +} + +void ibmasm_handle_mouse_interrupt(struct service_processor *sp) +{ + unsigned long reader; + unsigned long writer; + struct remote_input input; + + reader = get_queue_reader(sp); + writer = get_queue_writer(sp); + + while (reader != writer) { + memcpy_fromio(&input, get_queue_entry(sp, reader), + sizeof(struct remote_input)); + + print_input(&input); + if (input.type == INPUT_TYPE_MOUSE) { + send_mouse_event(sp->remote.mouse_dev, &input); + } else if (input.type == INPUT_TYPE_KEYBOARD) { + send_keyboard_event(sp->remote.keybd_dev, &input); + } else + break; + + reader = advance_queue_reader(sp, reader); + writer = get_queue_writer(sp); + } +} + +int ibmasm_init_remote_input_dev(struct service_processor *sp) +{ + /* set up the mouse input device */ + struct input_dev *mouse_dev, *keybd_dev; + struct pci_dev *pdev = to_pci_dev(sp->dev); + int error = -ENOMEM; + int i; + + sp->remote.mouse_dev = mouse_dev = input_allocate_device(); + sp->remote.keybd_dev = keybd_dev = input_allocate_device(); + + if (!mouse_dev || !keybd_dev) + goto err_free_devices; + + mouse_dev->id.bustype = BUS_PCI; + mouse_dev->id.vendor = pdev->vendor; + mouse_dev->id.product = pdev->device; + mouse_dev->id.version = 1; + mouse_dev->dev.parent = sp->dev; + mouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); + mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | + BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE); + set_bit(BTN_TOUCH, mouse_dev->keybit); + mouse_dev->name = "ibmasm RSA I remote mouse"; + input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0); + input_set_abs_params(mouse_dev, ABS_Y, 0, MOUSE_Y_MAX, 0, 0); + + keybd_dev->id.bustype = BUS_PCI; + keybd_dev->id.vendor = pdev->vendor; + keybd_dev->id.product = pdev->device; + keybd_dev->id.version = 2; + keybd_dev->dev.parent = sp->dev; + keybd_dev->evbit[0] = BIT_MASK(EV_KEY); + keybd_dev->name = "ibmasm RSA I remote keyboard"; + + for (i = 0; i < XLATE_SIZE; i++) { + if (xlate_high[i]) + set_bit(xlate_high[i], keybd_dev->keybit); + if (xlate[i]) + set_bit(xlate[i], keybd_dev->keybit); + } + + error = input_register_device(mouse_dev); + if (error) + goto err_free_devices; + + error = input_register_device(keybd_dev); + if (error) + goto err_unregister_mouse_dev; + + enable_mouse_interrupts(sp); + + printk(KERN_INFO "ibmasm remote responding to events on RSA card %d\n", sp->number); + + return 0; + + err_unregister_mouse_dev: + input_unregister_device(mouse_dev); + mouse_dev = NULL; /* so we don't try to free it again below */ + err_free_devices: + input_free_device(mouse_dev); + input_free_device(keybd_dev); + + return error; +} + +void ibmasm_free_remote_input_dev(struct service_processor *sp) +{ + disable_mouse_interrupts(sp); + input_unregister_device(sp->remote.mouse_dev); + input_unregister_device(sp->remote.keybd_dev); +} + diff --git a/drivers/misc/ibmasm/remote.h b/drivers/misc/ibmasm/remote.h new file mode 100644 index 00000000000..72acf5af7a2 --- /dev/null +++ b/drivers/misc/ibmasm/remote.h @@ -0,0 +1,270 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + * Orignally written by Pete Reynolds + */ + +#ifndef _IBMASM_REMOTE_H_ +#define _IBMASM_REMOTE_H_ + +#include <asm/io.h> + +/* pci offsets */ +#define CONDOR_MOUSE_DATA 0x000AC000 +#define CONDOR_MOUSE_ISR_CONTROL 0x00 +#define CONDOR_MOUSE_ISR_STATUS 0x04 +#define CONDOR_MOUSE_Q_READER 0x08 +#define CONDOR_MOUSE_Q_WRITER 0x0C +#define CONDOR_MOUSE_Q_BEGIN 0x10 +#define CONDOR_MOUSE_MAX_X 0x14 +#define CONDOR_MOUSE_MAX_Y 0x18 + +#define CONDOR_INPUT_DESKTOP_INFO 0x1F0 +#define CONDOR_INPUT_DISPLAY_RESX 0x1F4 +#define CONDOR_INPUT_DISPLAY_RESY 0x1F8 +#define CONDOR_INPUT_DISPLAY_BITS 0x1FC +#define CONDOR_OUTPUT_VNC_STATUS 0x200 + +#define CONDOR_MOUSE_INTR_STATUS_MASK 0x00000001 + +#define INPUT_TYPE_MOUSE 0x1 +#define INPUT_TYPE_KEYBOARD 0x2 + + +/* mouse button states received from SP */ +#define REMOTE_DOUBLE_CLICK 0xF0 +#define REMOTE_BUTTON_LEFT 0x01 +#define REMOTE_BUTTON_MIDDLE 0x02 +#define REMOTE_BUTTON_RIGHT 0x04 + +/* size of keysym/keycode translation matricies */ +#define XLATE_SIZE 256 + +struct mouse_input { + unsigned short y; + unsigned short x; +}; + + +struct keyboard_input { + unsigned short key_code; + unsigned char key_flag; + unsigned char key_down; +}; + + + +struct remote_input { + union { + struct mouse_input mouse; + struct keyboard_input keyboard; + } data; + + unsigned char type; + unsigned char pad1; + unsigned char mouse_buttons; + unsigned char pad3; +}; + +#define mouse_addr(sp) (sp->base_address + CONDOR_MOUSE_DATA) +#define display_width(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESX) +#define display_height(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESY) +#define display_depth(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_BITS) +#define desktop_info(sp) (mouse_addr(sp) + CONDOR_INPUT_DESKTOP_INFO) +#define vnc_status(sp) (mouse_addr(sp) + CONDOR_OUTPUT_VNC_STATUS) +#define isr_control(sp) (mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) + +#define mouse_interrupt_pending(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS) +#define clear_mouse_interrupt(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS) +#define enable_mouse_interrupts(sp) writel(1, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) +#define disable_mouse_interrupts(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) + +/* remote input queue operations */ +#define REMOTE_QUEUE_SIZE 60 + +#define get_queue_writer(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_WRITER) +#define get_queue_reader(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_READER) +#define set_queue_reader(sp, reader) writel(reader, mouse_addr(sp) + CONDOR_MOUSE_Q_READER) + +#define queue_begin (mouse_addr(sp) + CONDOR_MOUSE_Q_BEGIN) + +#define get_queue_entry(sp, read_index) \ + ((void*)(queue_begin + read_index * sizeof(struct remote_input))) + +static inline int advance_queue_reader(struct service_processor *sp, unsigned long reader) +{ + reader++; + if (reader == REMOTE_QUEUE_SIZE) + reader = 0; + + set_queue_reader(sp, reader); + return reader; +} + +#define NO_KEYCODE 0 +#define KEY_SYM_BK_SPC 0xFF08 +#define KEY_SYM_TAB 0xFF09 +#define KEY_SYM_ENTER 0xFF0D +#define KEY_SYM_SCR_LOCK 0xFF14 +#define KEY_SYM_ESCAPE 0xFF1B +#define KEY_SYM_HOME 0xFF50 +#define KEY_SYM_LARROW 0xFF51 +#define KEY_SYM_UARROW 0xFF52 +#define KEY_SYM_RARROW 0xFF53 +#define KEY_SYM_DARROW 0xFF54 +#define KEY_SYM_PAGEUP 0xFF55 +#define KEY_SYM_PAGEDOWN 0xFF56 +#define KEY_SYM_END 0xFF57 +#define KEY_SYM_INSERT 0xFF63 +#define KEY_SYM_NUM_LOCK 0xFF7F +#define KEY_SYM_KPSTAR 0xFFAA +#define KEY_SYM_KPPLUS 0xFFAB +#define KEY_SYM_KPMINUS 0xFFAD +#define KEY_SYM_KPDOT 0xFFAE +#define KEY_SYM_KPSLASH 0xFFAF +#define KEY_SYM_KPRIGHT 0xFF96 +#define KEY_SYM_KPUP 0xFF97 +#define KEY_SYM_KPLEFT 0xFF98 +#define KEY_SYM_KPDOWN 0xFF99 +#define KEY_SYM_KP0 0xFFB0 +#define KEY_SYM_KP1 0xFFB1 +#define KEY_SYM_KP2 0xFFB2 +#define KEY_SYM_KP3 0xFFB3 +#define KEY_SYM_KP4 0xFFB4 +#define KEY_SYM_KP5 0xFFB5 +#define KEY_SYM_KP6 0xFFB6 +#define KEY_SYM_KP7 0xFFB7 +#define KEY_SYM_KP8 0xFFB8 +#define KEY_SYM_KP9 0xFFB9 +#define KEY_SYM_F1 0xFFBE // 1B 5B 5B 41 +#define KEY_SYM_F2 0xFFBF // 1B 5B 5B 42 +#define KEY_SYM_F3 0xFFC0 // 1B 5B 5B 43 +#define KEY_SYM_F4 0xFFC1 // 1B 5B 5B 44 +#define KEY_SYM_F5 0xFFC2 // 1B 5B 5B 45 +#define KEY_SYM_F6 0xFFC3 // 1B 5B 31 37 7E +#define KEY_SYM_F7 0xFFC4 // 1B 5B 31 38 7E +#define KEY_SYM_F8 0xFFC5 // 1B 5B 31 39 7E +#define KEY_SYM_F9 0xFFC6 // 1B 5B 32 30 7E +#define KEY_SYM_F10 0xFFC7 // 1B 5B 32 31 7E +#define KEY_SYM_F11 0xFFC8 // 1B 5B 32 33 7E +#define KEY_SYM_F12 0xFFC9 // 1B 5B 32 34 7E +#define KEY_SYM_SHIFT 0xFFE1 +#define KEY_SYM_CTRL 0xFFE3 +#define KEY_SYM_ALT 0xFFE9 +#define KEY_SYM_CAP_LOCK 0xFFE5 +#define KEY_SYM_DELETE 0xFFFF +#define KEY_SYM_TILDE 0x60 +#define KEY_SYM_BKTIC 0x7E +#define KEY_SYM_ONE 0x31 +#define KEY_SYM_BANG 0x21 +#define KEY_SYM_TWO 0x32 +#define KEY_SYM_AT 0x40 +#define KEY_SYM_THREE 0x33 +#define KEY_SYM_POUND 0x23 +#define KEY_SYM_FOUR 0x34 +#define KEY_SYM_DOLLAR 0x24 +#define KEY_SYM_FIVE 0x35 +#define KEY_SYM_PERCENT 0x25 +#define KEY_SYM_SIX 0x36 +#define KEY_SYM_CARAT 0x5E +#define KEY_SYM_SEVEN 0x37 +#define KEY_SYM_AMPER 0x26 +#define KEY_SYM_EIGHT 0x38 +#define KEY_SYM_STAR 0x2A +#define KEY_SYM_NINE 0x39 +#define KEY_SYM_LPAREN 0x28 +#define KEY_SYM_ZERO 0x30 +#define KEY_SYM_RPAREN 0x29 +#define KEY_SYM_MINUS 0x2D +#define KEY_SYM_USCORE 0x5F +#define KEY_SYM_EQUAL 0x2B +#define KEY_SYM_PLUS 0x3D +#define KEY_SYM_LBRKT 0x5B +#define KEY_SYM_LCURLY 0x7B +#define KEY_SYM_RBRKT 0x5D +#define KEY_SYM_RCURLY 0x7D +#define KEY_SYM_SLASH 0x5C +#define KEY_SYM_PIPE 0x7C +#define KEY_SYM_TIC 0x27 +#define KEY_SYM_QUOTE 0x22 +#define KEY_SYM_SEMIC 0x3B +#define KEY_SYM_COLON 0x3A +#define KEY_SYM_COMMA 0x2C +#define KEY_SYM_LT 0x3C +#define KEY_SYM_PERIOD 0x2E +#define KEY_SYM_GT 0x3E +#define KEY_SYM_BSLASH 0x2F +#define KEY_SYM_QMARK 0x3F +#define KEY_SYM_A 0x41 +#define KEY_SYM_B 0x42 +#define KEY_SYM_C 0x43 +#define KEY_SYM_D 0x44 +#define KEY_SYM_E 0x45 +#define KEY_SYM_F 0x46 +#define KEY_SYM_G 0x47 +#define KEY_SYM_H 0x48 +#define KEY_SYM_I 0x49 +#define KEY_SYM_J 0x4A +#define KEY_SYM_K 0x4B +#define KEY_SYM_L 0x4C +#define KEY_SYM_M 0x4D +#define KEY_SYM_N 0x4E +#define KEY_SYM_O 0x4F +#define KEY_SYM_P 0x50 +#define KEY_SYM_Q 0x51 +#define KEY_SYM_R 0x52 +#define KEY_SYM_S 0x53 +#define KEY_SYM_T 0x54 +#define KEY_SYM_U 0x55 +#define KEY_SYM_V 0x56 +#define KEY_SYM_W 0x57 +#define KEY_SYM_X 0x58 +#define KEY_SYM_Y 0x59 +#define KEY_SYM_Z 0x5A +#define KEY_SYM_a 0x61 +#define KEY_SYM_b 0x62 +#define KEY_SYM_c 0x63 +#define KEY_SYM_d 0x64 +#define KEY_SYM_e 0x65 +#define KEY_SYM_f 0x66 +#define KEY_SYM_g 0x67 +#define KEY_SYM_h 0x68 +#define KEY_SYM_i 0x69 +#define KEY_SYM_j 0x6A +#define KEY_SYM_k 0x6B +#define KEY_SYM_l 0x6C +#define KEY_SYM_m 0x6D +#define KEY_SYM_n 0x6E +#define KEY_SYM_o 0x6F +#define KEY_SYM_p 0x70 +#define KEY_SYM_q 0x71 +#define KEY_SYM_r 0x72 +#define KEY_SYM_s 0x73 +#define KEY_SYM_t 0x74 +#define KEY_SYM_u 0x75 +#define KEY_SYM_v 0x76 +#define KEY_SYM_w 0x77 +#define KEY_SYM_x 0x78 +#define KEY_SYM_y 0x79 +#define KEY_SYM_z 0x7A +#define KEY_SYM_SPACE 0x20 +#endif /* _IBMASM_REMOTE_H_ */ diff --git a/drivers/misc/ibmasm/uart.c b/drivers/misc/ibmasm/uart.c new file mode 100644 index 00000000000..93baa350d69 --- /dev/null +++ b/drivers/misc/ibmasm/uart.c @@ -0,0 +1,72 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/termios.h> +#include <linux/tty.h> +#include <linux/serial_core.h> +#include <linux/serial_reg.h> +#include <linux/serial_8250.h> +#include "ibmasm.h" +#include "lowlevel.h" + + +void ibmasm_register_uart(struct service_processor *sp) +{ + struct uart_port uport; + void __iomem *iomem_base; + + iomem_base = sp->base_address + SCOUT_COM_B_BASE; + + /* read the uart scratch register to determine if the UART + * is dedicated to the service processor or if the OS can use it + */ + if (0 == readl(iomem_base + UART_SCR)) { + dev_info(sp->dev, "IBM SP UART not registered, owned by service processor\n"); + sp->serial_line = -1; + return; + } + + memset(&uport, 0, sizeof(struct uart_port)); + uport.irq = sp->irq; + uport.uartclk = 3686400; + uport.flags = UPF_SHARE_IRQ; + uport.iotype = UPIO_MEM; + uport.membase = iomem_base; + + sp->serial_line = serial8250_register_port(&uport); + if (sp->serial_line < 0) { + dev_err(sp->dev, "Failed to register serial port\n"); + return; + } + enable_uart_interrupts(sp->base_address); +} + +void ibmasm_unregister_uart(struct service_processor *sp) +{ + if (sp->serial_line < 0) + return; + + disable_uart_interrupts(sp->base_address); + serial8250_unregister_port(sp->serial_line); +} diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c new file mode 100644 index 00000000000..395a4ea64e9 --- /dev/null +++ b/drivers/misc/ics932s401.c @@ -0,0 +1,505 @@ +/* + * A driver for the Integrated Circuits ICS932S401 + * Copyright (C) 2008 IBM + * + * Author: Darrick J. Wong <djwong@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/jiffies.h> +#include <linux/i2c.h> +#include <linux/err.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/log2.h> + +/* Addresses to scan */ +static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END }; + +/* ICS932S401 registers */ +#define ICS932S401_REG_CFG2 0x01 +#define ICS932S401_CFG1_SPREAD 0x01 +#define ICS932S401_REG_CFG7 0x06 +#define ICS932S401_FS_MASK 0x07 +#define ICS932S401_REG_VENDOR_REV 0x07 +#define ICS932S401_VENDOR 1 +#define ICS932S401_VENDOR_MASK 0x0F +#define ICS932S401_REV 4 +#define ICS932S401_REV_SHIFT 4 +#define ICS932S401_REG_DEVICE 0x09 +#define ICS932S401_DEVICE 11 +#define ICS932S401_REG_CTRL 0x0A +#define ICS932S401_MN_ENABLED 0x80 +#define ICS932S401_CPU_ALT 0x04 +#define ICS932S401_SRC_ALT 0x08 +#define ICS932S401_REG_CPU_M_CTRL 0x0B +#define ICS932S401_M_MASK 0x3F +#define ICS932S401_REG_CPU_N_CTRL 0x0C +#define ICS932S401_REG_CPU_SPREAD1 0x0D +#define ICS932S401_REG_CPU_SPREAD2 0x0E +#define ICS932S401_SPREAD_MASK 0x7FFF +#define ICS932S401_REG_SRC_M_CTRL 0x0F +#define ICS932S401_REG_SRC_N_CTRL 0x10 +#define ICS932S401_REG_SRC_SPREAD1 0x11 +#define ICS932S401_REG_SRC_SPREAD2 0x12 +#define ICS932S401_REG_CPU_DIVISOR 0x13 +#define ICS932S401_CPU_DIVISOR_SHIFT 4 +#define ICS932S401_REG_PCISRC_DIVISOR 0x14 +#define ICS932S401_SRC_DIVISOR_MASK 0x0F +#define ICS932S401_PCI_DIVISOR_SHIFT 4 + +/* Base clock is 14.318MHz */ +#define BASE_CLOCK 14318 + +#define NUM_REGS 21 +#define NUM_MIRRORED_REGS 15 + +static int regs_to_copy[NUM_MIRRORED_REGS] = { + ICS932S401_REG_CFG2, + ICS932S401_REG_CFG7, + ICS932S401_REG_VENDOR_REV, + ICS932S401_REG_DEVICE, + ICS932S401_REG_CTRL, + ICS932S401_REG_CPU_M_CTRL, + ICS932S401_REG_CPU_N_CTRL, + ICS932S401_REG_CPU_SPREAD1, + ICS932S401_REG_CPU_SPREAD2, + ICS932S401_REG_SRC_M_CTRL, + ICS932S401_REG_SRC_N_CTRL, + ICS932S401_REG_SRC_SPREAD1, + ICS932S401_REG_SRC_SPREAD2, + ICS932S401_REG_CPU_DIVISOR, + ICS932S401_REG_PCISRC_DIVISOR, +}; + +/* How often do we reread sensors values? (In jiffies) */ +#define SENSOR_REFRESH_INTERVAL (2 * HZ) + +/* How often do we reread sensor limit values? (In jiffies) */ +#define LIMIT_REFRESH_INTERVAL (60 * HZ) + +struct ics932s401_data { + struct attribute_group attrs; + struct mutex lock; + char sensors_valid; + unsigned long sensors_last_updated; /* In jiffies */ + + u8 regs[NUM_REGS]; +}; + +static int ics932s401_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int ics932s401_detect(struct i2c_client *client, + struct i2c_board_info *info); +static int ics932s401_remove(struct i2c_client *client); + +static const struct i2c_device_id ics932s401_id[] = { + { "ics932s401", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ics932s401_id); + +static struct i2c_driver ics932s401_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "ics932s401", + }, + .probe = ics932s401_probe, + .remove = ics932s401_remove, + .id_table = ics932s401_id, + .detect = ics932s401_detect, + .address_list = normal_i2c, +}; + +static struct ics932s401_data *ics932s401_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ics932s401_data *data = i2c_get_clientdata(client); + unsigned long local_jiffies = jiffies; + int i, temp; + + mutex_lock(&data->lock); + if (time_before(local_jiffies, data->sensors_last_updated + + SENSOR_REFRESH_INTERVAL) + && data->sensors_valid) + goto out; + + /* + * Each register must be read as a word and then right shifted 8 bits. + * Not really sure why this is; setting the "byte count programming" + * register to 1 does not fix this problem. + */ + for (i = 0; i < NUM_MIRRORED_REGS; i++) { + temp = i2c_smbus_read_word_data(client, regs_to_copy[i]); + data->regs[regs_to_copy[i]] = temp >> 8; + } + + data->sensors_last_updated = local_jiffies; + data->sensors_valid = 1; + +out: + mutex_unlock(&data->lock); + return data; +} + +static ssize_t show_spread_enabled(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + if (data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD) + return sprintf(buf, "1\n"); + + return sprintf(buf, "0\n"); +} + +/* bit to cpu khz map */ +static const int fs_speeds[] = { + 266666, + 133333, + 200000, + 166666, + 333333, + 100000, + 400000, + 0, +}; + +/* clock divisor map */ +static const int divisors[] = {2, 3, 5, 15, 4, 6, 10, 30, 8, 12, 20, 60, 16, + 24, 40, 120}; + +/* Calculate CPU frequency from the M/N registers. */ +static int calculate_cpu_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_CPU_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_CPU_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_CPU_DIVISOR] >> + ICS932S401_CPU_DIVISOR_SHIFT]; + + return freq; +} + +static ssize_t show_cpu_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_cpu_freq(data)); +} + +static ssize_t show_cpu_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_cpu_freq(data); + else { + /* Freq is neatly wrapped up for us */ + int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK; + freq = fs_speeds[fid]; + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) { + switch (freq) { + case 166666: + freq = 160000; + break; + case 333333: + freq = 320000; + break; + } + } + } + + return sprintf(buf, "%d\n", freq); +} + +/* Calculate SRC frequency from the M/N registers. */ +static int calculate_src_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_SRC_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] & + ICS932S401_SRC_DIVISOR_MASK]; + + return freq; +} + +static ssize_t show_src_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_src_freq(data)); +} + +static ssize_t show_src_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_src_freq(data); + else + /* Freq is neatly wrapped up for us */ + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT && + data->regs[ICS932S401_REG_CTRL] & ICS932S401_SRC_ALT) + freq = 96000; + else + freq = 100000; + + return sprintf(buf, "%d\n", freq); +} + +/* Calculate PCI frequency from the SRC M/N registers. */ +static int calculate_pci_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_SRC_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] >> + ICS932S401_PCI_DIVISOR_SHIFT]; + + return freq; +} + +static ssize_t show_pci_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_pci_freq(data)); +} + +static ssize_t show_pci_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_pci_freq(data); + else + freq = 33333; + + return sprintf(buf, "%d\n", freq); +} + +static ssize_t show_value(struct device *dev, + struct device_attribute *devattr, + char *buf); + +static ssize_t show_spread(struct device *dev, + struct device_attribute *devattr, + char *buf); + +static DEVICE_ATTR(spread_enabled, S_IRUGO, show_spread_enabled, NULL); +static DEVICE_ATTR(cpu_clock_selection, S_IRUGO, show_cpu_clock_sel, NULL); +static DEVICE_ATTR(cpu_clock, S_IRUGO, show_cpu_clock, NULL); +static DEVICE_ATTR(src_clock_selection, S_IRUGO, show_src_clock_sel, NULL); +static DEVICE_ATTR(src_clock, S_IRUGO, show_src_clock, NULL); +static DEVICE_ATTR(pci_clock_selection, S_IRUGO, show_pci_clock_sel, NULL); +static DEVICE_ATTR(pci_clock, S_IRUGO, show_pci_clock, NULL); +static DEVICE_ATTR(usb_clock, S_IRUGO, show_value, NULL); +static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL); +static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL); +static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL); + +static struct attribute *ics932s401_attr[] = +{ + &dev_attr_spread_enabled.attr, + &dev_attr_cpu_clock_selection.attr, + &dev_attr_cpu_clock.attr, + &dev_attr_src_clock_selection.attr, + &dev_attr_src_clock.attr, + &dev_attr_pci_clock_selection.attr, + &dev_attr_pci_clock.attr, + &dev_attr_usb_clock.attr, + &dev_attr_ref_clock.attr, + &dev_attr_cpu_spread.attr, + &dev_attr_src_spread.attr, + NULL +}; + +static ssize_t show_value(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + int x; + + if (devattr == &dev_attr_usb_clock) + x = 48000; + else if (devattr == &dev_attr_ref_clock) + x = BASE_CLOCK; + else + BUG(); + + return sprintf(buf, "%d\n", x); +} + +static ssize_t show_spread(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int reg; + unsigned long val; + + if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD)) + return sprintf(buf, "0%%\n"); + + if (devattr == &dev_attr_src_spread) + reg = ICS932S401_REG_SRC_SPREAD1; + else if (devattr == &dev_attr_cpu_spread) + reg = ICS932S401_REG_CPU_SPREAD1; + else + BUG(); + + val = data->regs[reg] | (data->regs[reg + 1] << 8); + val &= ICS932S401_SPREAD_MASK; + + /* Scale 0..2^14 to -0.5. */ + val = 500000 * val / 16384; + return sprintf(buf, "-0.%lu%%\n", val); +} + +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int ics932s401_detect(struct i2c_client *client, + struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + int vendor, device, revision; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + + vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV); + vendor >>= 8; + revision = vendor >> ICS932S401_REV_SHIFT; + vendor &= ICS932S401_VENDOR_MASK; + if (vendor != ICS932S401_VENDOR) + return -ENODEV; + + device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE); + device >>= 8; + if (device != ICS932S401_DEVICE) + return -ENODEV; + + if (revision != ICS932S401_REV) + dev_info(&adapter->dev, "Unknown revision %d\n", revision); + + strlcpy(info->type, "ics932s401", I2C_NAME_SIZE); + + return 0; +} + +static int ics932s401_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ics932s401_data *data; + int err; + + data = kzalloc(sizeof(struct ics932s401_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + + dev_info(&client->dev, "%s chip found\n", client->name); + + /* Register sysfs hooks */ + data->attrs.attrs = ics932s401_attr; + err = sysfs_create_group(&client->dev.kobj, &data->attrs); + if (err) + goto exit_free; + + return 0; + +exit_free: + kfree(data); +exit: + return err; +} + +static int ics932s401_remove(struct i2c_client *client) +{ + struct ics932s401_data *data = i2c_get_clientdata(client); + + sysfs_remove_group(&client->dev.kobj, &data->attrs); + kfree(data); + return 0; +} + +static int __init ics932s401_init(void) +{ + return i2c_add_driver(&ics932s401_driver); +} + +static void __exit ics932s401_exit(void) +{ + i2c_del_driver(&ics932s401_driver); +} + +MODULE_AUTHOR("Darrick J. Wong <djwong@us.ibm.com>"); +MODULE_DESCRIPTION("ICS932S401 driver"); +MODULE_LICENSE("GPL"); + +module_init(ics932s401_init); +module_exit(ics932s401_exit); + +/* IBM IntelliStation Z30 */ +MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*"); +MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*"); + +/* IBM x3650/x3550 */ +MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3650*"); +MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3550*"); diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c new file mode 100644 index 00000000000..09dcb699e66 --- /dev/null +++ b/drivers/misc/ioc4.c @@ -0,0 +1,512 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* This file contains the master driver module for use by SGI IOC4 subdrivers. + * + * It allocates any resources shared between multiple subdevices, and + * provides accessor functions (where needed) and the like for those + * resources. It also provides a mechanism for the subdevice modules + * to support loading and unloading. + * + * Non-shared resources (e.g. external interrupt A_INT_OUT register page + * alias, serial port and UART registers) are handled by the subdevice + * modules themselves. + * + * This is all necessary because IOC4 is not implemented as a multi-function + * PCI device, but an amalgamation of disparate registers for several + * types of device (ATA, serial, external interrupts). The normal + * resource management in the kernel doesn't have quite the right interfaces + * to handle this situation (e.g. multiple modules can't claim the same + * PCI ID), thus this IOC4 master module. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/ioc4.h> +#include <linux/ktime.h> +#include <linux/mutex.h> +#include <linux/time.h> +#include <asm/io.h> + +/*************** + * Definitions * + ***************/ + +/* Tweakable values */ + +/* PCI bus speed detection/calibration */ +#define IOC4_CALIBRATE_COUNT 63 /* Calibration cycle period */ +#define IOC4_CALIBRATE_CYCLES 256 /* Average over this many cycles */ +#define IOC4_CALIBRATE_DISCARD 2 /* Discard first few cycles */ +#define IOC4_CALIBRATE_LOW_MHZ 25 /* Lower bound on bus speed sanity */ +#define IOC4_CALIBRATE_HIGH_MHZ 75 /* Upper bound on bus speed sanity */ +#define IOC4_CALIBRATE_DEFAULT_MHZ 66 /* Assumed if sanity check fails */ + +/************************ + * Submodule management * + ************************/ + +static DEFINE_MUTEX(ioc4_mutex); + +static LIST_HEAD(ioc4_devices); +static LIST_HEAD(ioc4_submodules); + +/* Register an IOC4 submodule */ +int +ioc4_register_submodule(struct ioc4_submodule *is) +{ + struct ioc4_driver_data *idd; + + mutex_lock(&ioc4_mutex); + list_add(&is->is_list, &ioc4_submodules); + + /* Initialize submodule for each IOC4 */ + if (!is->is_probe) + goto out; + + list_for_each_entry(idd, &ioc4_devices, idd_list) { + if (is->is_probe(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule %s probe failed " + "for pci_dev %s", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + out: + mutex_unlock(&ioc4_mutex); + return 0; +} + +/* Unregister an IOC4 submodule */ +void +ioc4_unregister_submodule(struct ioc4_submodule *is) +{ + struct ioc4_driver_data *idd; + + mutex_lock(&ioc4_mutex); + list_del(&is->is_list); + + /* Remove submodule for each IOC4 */ + if (!is->is_remove) + goto out; + + list_for_each_entry(idd, &ioc4_devices, idd_list) { + if (is->is_remove(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule %s remove failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + out: + mutex_unlock(&ioc4_mutex); +} + +/********************* + * Device management * + *********************/ + +#define IOC4_CALIBRATE_LOW_LIMIT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ) +#define IOC4_CALIBRATE_HIGH_LIMIT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ) +#define IOC4_CALIBRATE_DEFAULT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ) + +#define IOC4_CALIBRATE_END \ + (IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD) + +#define IOC4_INT_OUT_MODE_TOGGLE 0x7 /* Toggle INT_OUT every COUNT+1 ticks */ + +/* Determines external interrupt output clock period of the PCI bus an + * IOC4 is attached to. This value can be used to determine the PCI + * bus speed. + * + * IOC4 has a design feature that various internal timers are derived from + * the PCI bus clock. This causes IOC4 device drivers to need to take the + * bus speed into account when setting various register values (e.g. INT_OUT + * register COUNT field, UART divisors, etc). Since this information is + * needed by several subdrivers, it is determined by the main IOC4 driver, + * even though the following code utilizes external interrupt registers + * to perform the speed calculation. + */ +static void __devinit +ioc4_clock_calibrate(struct ioc4_driver_data *idd) +{ + union ioc4_int_out int_out; + union ioc4_gpcr gpcr; + unsigned int state, last_state = 1; + struct timespec start_ts, end_ts; + uint64_t start, end, period; + unsigned int count = 0; + + /* Enable output */ + gpcr.raw = 0; + gpcr.fields.dir = IOC4_GPCR_DIR_0; + gpcr.fields.int_out_en = 1; + writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw); + + /* Reset to power-on state */ + writel(0, &idd->idd_misc_regs->int_out.raw); + mmiowb(); + + /* Set up square wave */ + int_out.raw = 0; + int_out.fields.count = IOC4_CALIBRATE_COUNT; + int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE; + int_out.fields.diag = 0; + writel(int_out.raw, &idd->idd_misc_regs->int_out.raw); + mmiowb(); + + /* Check square wave period averaged over some number of cycles */ + do { + int_out.raw = readl(&idd->idd_misc_regs->int_out.raw); + state = int_out.fields.int_out; + if (!last_state && state) { + count++; + if (count == IOC4_CALIBRATE_END) { + ktime_get_ts(&end_ts); + break; + } else if (count == IOC4_CALIBRATE_DISCARD) + ktime_get_ts(&start_ts); + } + last_state = state; + } while (1); + + /* Calculation rearranged to preserve intermediate precision. + * Logically: + * 1. "end - start" gives us the measurement period over all + * the square wave cycles. + * 2. Divide by number of square wave cycles to get the period + * of a square wave cycle. + * 3. Divide by 2*(int_out.fields.count+1), which is the formula + * by which the IOC4 generates the square wave, to get the + * period of an IOC4 INT_OUT count. + */ + end = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec; + start = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec; + period = (end - start) / + (IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1)); + + /* Bounds check the result. */ + if (period > IOC4_CALIBRATE_LOW_LIMIT || + period < IOC4_CALIBRATE_HIGH_LIMIT) { + printk(KERN_INFO + "IOC4 %s: Clock calibration failed. Assuming" + "PCI clock is %d ns.\n", + pci_name(idd->idd_pdev), + IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR); + period = IOC4_CALIBRATE_DEFAULT; + } else { + u64 ns = period; + + do_div(ns, IOC4_EXTINT_COUNT_DIVISOR); + printk(KERN_DEBUG + "IOC4 %s: PCI clock is %llu ns.\n", + pci_name(idd->idd_pdev), (unsigned long long)ns); + } + + /* Remember results. We store the extint clock period rather + * than the PCI clock period so that greater precision is + * retained. Divide by IOC4_EXTINT_COUNT_DIVISOR to get + * PCI clock period. + */ + idd->count_period = period; +} + +/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT. + * Each brings out different combinations of IOC4 signals, thus. + * the IOC4 subdrivers need to know to which we're attached. + * + * We look for the presence of a SCSI (IO9) or SATA (IO10) controller + * on the same PCI bus at slot number 3 to differentiate IO9 from IO10. + * If neither is present, it's a PCI-RT. + */ +static unsigned int __devinit +ioc4_variant(struct ioc4_driver_data *idd) +{ + struct pci_dev *pdev = NULL; + int found = 0; + + /* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */ + do { + pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC, + PCI_DEVICE_ID_QLOGIC_ISP12160, pdev); + if (pdev && + idd->idd_pdev->bus->number == pdev->bus->number && + 3 == PCI_SLOT(pdev->devfn)) + found = 1; + } while (pdev && !found); + if (NULL != pdev) { + pci_dev_put(pdev); + return IOC4_VARIANT_IO9; + } + + /* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */ + pdev = NULL; + do { + pdev = pci_get_device(PCI_VENDOR_ID_VITESSE, + PCI_DEVICE_ID_VITESSE_VSC7174, pdev); + if (pdev && + idd->idd_pdev->bus->number == pdev->bus->number && + 3 == PCI_SLOT(pdev->devfn)) + found = 1; + } while (pdev && !found); + if (NULL != pdev) { + pci_dev_put(pdev); + return IOC4_VARIANT_IO10; + } + + /* PCI-RT: No SCSI/SATA controller will be present */ + return IOC4_VARIANT_PCI_RT; +} + +static void __devinit +ioc4_load_modules(struct work_struct *work) +{ + /* arg just has to be freed */ + + request_module("sgiioc4"); + + kfree(work); +} + +/* Adds a new instance of an IOC4 card */ +static int __devinit +ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) +{ + struct ioc4_driver_data *idd; + struct ioc4_submodule *is; + uint32_t pcmd; + int ret; + + /* Enable IOC4 and take ownership of it */ + if ((ret = pci_enable_device(pdev))) { + printk(KERN_WARNING + "%s: Failed to enable IOC4 device for pci_dev %s.\n", + __func__, pci_name(pdev)); + goto out; + } + pci_set_master(pdev); + + /* Set up per-IOC4 data */ + idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL); + if (!idd) { + printk(KERN_WARNING + "%s: Failed to allocate IOC4 data for pci_dev %s.\n", + __func__, pci_name(pdev)); + ret = -ENODEV; + goto out_idd; + } + idd->idd_pdev = pdev; + idd->idd_pci_id = pci_id; + + /* Map IOC4 misc registers. These are shared between subdevices + * so the main IOC4 module manages them. + */ + idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0); + if (!idd->idd_bar0) { + printk(KERN_WARNING + "%s: Unable to find IOC4 misc resource " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_pci; + } + if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs), + "ioc4_misc")) { + printk(KERN_WARNING + "%s: Unable to request IOC4 misc region " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_pci; + } + idd->idd_misc_regs = ioremap(idd->idd_bar0, + sizeof(struct ioc4_misc_regs)); + if (!idd->idd_misc_regs) { + printk(KERN_WARNING + "%s: Unable to remap IOC4 misc region " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_misc_region; + } + + /* Failsafe portion of per-IOC4 initialization */ + + /* Detect card variant */ + idd->idd_variant = ioc4_variant(idd); + printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev), + idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" : + idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" : + idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown"); + + /* Initialize IOC4 */ + pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd); + pci_write_config_dword(idd->idd_pdev, PCI_COMMAND, + pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + + /* Determine PCI clock */ + ioc4_clock_calibrate(idd); + + /* Disable/clear all interrupts. Need to do this here lest + * one submodule request the shared IOC4 IRQ, but interrupt + * is generated by a different subdevice. + */ + /* Disable */ + writel(~0, &idd->idd_misc_regs->other_iec.raw); + writel(~0, &idd->idd_misc_regs->sio_iec); + /* Clear (i.e. acknowledge) */ + writel(~0, &idd->idd_misc_regs->other_ir.raw); + writel(~0, &idd->idd_misc_regs->sio_ir); + + /* Track PCI-device specific data */ + idd->idd_serial_data = NULL; + pci_set_drvdata(idd->idd_pdev, idd); + + mutex_lock(&ioc4_mutex); + list_add_tail(&idd->idd_list, &ioc4_devices); + + /* Add this IOC4 to all submodules */ + list_for_each_entry(is, &ioc4_submodules, is_list) { + if (is->is_probe && is->is_probe(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule 0x%s probe failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + mutex_unlock(&ioc4_mutex); + + /* Request sgiioc4 IDE driver on boards that bring that functionality + * off of IOC4. The root filesystem may be hosted on a drive connected + * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it + * won't be picked up by modprobes due to the ioc4 module owning the + * PCI device. + */ + if (idd->idd_variant != IOC4_VARIANT_PCI_RT) { + struct work_struct *work; + work = kzalloc(sizeof(struct work_struct), GFP_KERNEL); + if (!work) { + printk(KERN_WARNING + "%s: IOC4 unable to allocate memory for " + "load of sub-modules.\n", __func__); + } else { + /* Request the module from a work procedure as the + * modprobe goes out to a userland helper and that + * will hang if done directly from ioc4_probe(). + */ + printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n"); + INIT_WORK(work, ioc4_load_modules); + schedule_work(work); + } + } + + return 0; + +out_misc_region: + release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); +out_pci: + kfree(idd); +out_idd: + pci_disable_device(pdev); +out: + return ret; +} + +/* Removes a particular instance of an IOC4 card. */ +static void __devexit +ioc4_remove(struct pci_dev *pdev) +{ + struct ioc4_submodule *is; + struct ioc4_driver_data *idd; + + idd = pci_get_drvdata(pdev); + + /* Remove this IOC4 from all submodules */ + mutex_lock(&ioc4_mutex); + list_for_each_entry(is, &ioc4_submodules, is_list) { + if (is->is_remove && is->is_remove(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule 0x%s remove failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + mutex_unlock(&ioc4_mutex); + + /* Release resources */ + iounmap(idd->idd_misc_regs); + if (!idd->idd_bar0) { + printk(KERN_WARNING + "%s: Unable to get IOC4 misc mapping for pci_dev %s. " + "Device removal may be incomplete.\n", + __func__, pci_name(idd->idd_pdev)); + } + release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); + + /* Disable IOC4 and relinquish */ + pci_disable_device(pdev); + + /* Remove and free driver data */ + mutex_lock(&ioc4_mutex); + list_del(&idd->idd_list); + mutex_unlock(&ioc4_mutex); + kfree(idd); +} + +static struct pci_device_id ioc4_id_table[] = { + {PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID, + PCI_ANY_ID, 0x0b4000, 0xFFFFFF}, + {0} +}; + +static struct pci_driver ioc4_driver = { + .name = "IOC4", + .id_table = ioc4_id_table, + .probe = ioc4_probe, + .remove = __devexit_p(ioc4_remove), +}; + +MODULE_DEVICE_TABLE(pci, ioc4_id_table); + +/********************* + * Module management * + *********************/ + +/* Module load */ +static int __init +ioc4_init(void) +{ + return pci_register_driver(&ioc4_driver); +} + +/* Module unload */ +static void __exit +ioc4_exit(void) +{ + /* Ensure ioc4_load_modules() has completed before exiting */ + flush_scheduled_work(); + pci_unregister_driver(&ioc4_driver); +} + +module_init(ioc4_init); +module_exit(ioc4_exit); + +MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. <bcasavan@sgi.com>"); +MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card"); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(ioc4_register_submodule); +EXPORT_SYMBOL(ioc4_unregister_submodule); diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c new file mode 100644 index 00000000000..a71e245801e --- /dev/null +++ b/drivers/misc/isl29003.c @@ -0,0 +1,475 @@ +/* + * isl29003.c - Linux kernel module for + * Intersil ISL29003 ambient light sensor + * + * See file:Documentation/misc-devices/isl29003 + * + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de> + * + * Based on code written by + * Rodolfo Giometti <giometti@linux.it> + * Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/mutex.h> +#include <linux/delay.h> + +#define ISL29003_DRV_NAME "isl29003" +#define DRIVER_VERSION "1.0" + +#define ISL29003_REG_COMMAND 0x00 +#define ISL29003_ADC_ENABLED (1 << 7) +#define ISL29003_ADC_PD (1 << 6) +#define ISL29003_TIMING_INT (1 << 5) +#define ISL29003_MODE_SHIFT (2) +#define ISL29003_MODE_MASK (0x3 << ISL29003_MODE_SHIFT) +#define ISL29003_RES_SHIFT (0) +#define ISL29003_RES_MASK (0x3 << ISL29003_RES_SHIFT) + +#define ISL29003_REG_CONTROL 0x01 +#define ISL29003_INT_FLG (1 << 5) +#define ISL29003_RANGE_SHIFT (2) +#define ISL29003_RANGE_MASK (0x3 << ISL29003_RANGE_SHIFT) +#define ISL29003_INT_PERSISTS_SHIFT (0) +#define ISL29003_INT_PERSISTS_MASK (0xf << ISL29003_INT_PERSISTS_SHIFT) + +#define ISL29003_REG_IRQ_THRESH_HI 0x02 +#define ISL29003_REG_IRQ_THRESH_LO 0x03 +#define ISL29003_REG_LSB_SENSOR 0x04 +#define ISL29003_REG_MSB_SENSOR 0x05 +#define ISL29003_REG_LSB_TIMER 0x06 +#define ISL29003_REG_MSB_TIMER 0x07 + +#define ISL29003_NUM_CACHABLE_REGS 4 + +struct isl29003_data { + struct i2c_client *client; + struct mutex lock; + u8 reg_cache[ISL29003_NUM_CACHABLE_REGS]; + u8 power_state_before_suspend; +}; + +static int gain_range[] = { + 1000, 4000, 16000, 64000 +}; + +/* + * register access helpers + */ + +static int __isl29003_read_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + return (data->reg_cache[reg] & mask) >> shift; +} + +static int __isl29003_write_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift, u8 val) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int ret = 0; + u8 tmp; + + if (reg >= ISL29003_NUM_CACHABLE_REGS) + return -EINVAL; + + mutex_lock(&data->lock); + + tmp = data->reg_cache[reg]; + tmp &= ~mask; + tmp |= val << shift; + + ret = i2c_smbus_write_byte_data(client, reg, tmp); + if (!ret) + data->reg_cache[reg] = tmp; + + mutex_unlock(&data->lock); + return ret; +} + +/* + * internally used functions + */ + +/* range */ +static int isl29003_get_range(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT); +} + +static int isl29003_set_range(struct i2c_client *client, int range) +{ + return __isl29003_write_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range); +} + +/* resolution */ +static int isl29003_get_resolution(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_resolution(struct i2c_client *client, int res) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, res); +} + +/* mode */ +static int isl29003_get_mode(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_mode(struct i2c_client *client, int mode) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode); +} + +/* power_state */ +static int isl29003_set_power_state(struct i2c_client *client, int state) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0, + state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD); +} + +static int isl29003_get_power_state(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND]; + return ~cmdreg & ISL29003_ADC_PD; +} + +static int isl29003_get_adc_value(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int lsb, msb, range, bitdepth; + + mutex_lock(&data->lock); + lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR); + + if (lsb < 0) { + mutex_unlock(&data->lock); + return lsb; + } + + msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR); + mutex_unlock(&data->lock); + + if (msb < 0) + return msb; + + range = isl29003_get_range(client); + bitdepth = (4 - isl29003_get_resolution(client)) * 4; + return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth; +} + +/* + * sysfs layer + */ + +/* range */ +static ssize_t isl29003_show_range(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%i\n", isl29003_get_range(client)); +} + +static ssize_t isl29003_store_range(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3)) + return -EINVAL; + + ret = isl29003_set_range(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(range, S_IWUSR | S_IRUGO, + isl29003_show_range, isl29003_store_range); + + +/* resolution */ +static ssize_t isl29003_show_resolution(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_resolution(client)); +} + +static ssize_t isl29003_store_resolution(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3)) + return -EINVAL; + + ret = isl29003_set_resolution(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO, + isl29003_show_resolution, isl29003_store_resolution); + +/* mode */ +static ssize_t isl29003_show_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_mode(client)); +} + +static ssize_t isl29003_store_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 2)) + return -EINVAL; + + ret = isl29003_set_mode(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, + isl29003_show_mode, isl29003_store_mode); + + +/* power state */ +static ssize_t isl29003_show_power_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_power_state(client)); +} + +static ssize_t isl29003_store_power_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 1)) + return -EINVAL; + + ret = isl29003_set_power_state(client, val); + return ret ? ret : count; +} + +static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, + isl29003_show_power_state, isl29003_store_power_state); + + +/* lux */ +static ssize_t isl29003_show_lux(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + /* No LUX data if not operational */ + if (!isl29003_get_power_state(client)) + return -EBUSY; + + return sprintf(buf, "%d\n", isl29003_get_adc_value(client)); +} + +static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL); + +static struct attribute *isl29003_attributes[] = { + &dev_attr_range.attr, + &dev_attr_resolution.attr, + &dev_attr_mode.attr, + &dev_attr_power_state.attr, + &dev_attr_lux.attr, + NULL +}; + +static const struct attribute_group isl29003_attr_group = { + .attrs = isl29003_attributes, +}; + +static int isl29003_init_client(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int i; + + /* read all the registers once to fill the cache. + * if one of the reads fails, we consider the init failed */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) { + int v = i2c_smbus_read_byte_data(client, i); + if (v < 0) + return -ENODEV; + + data->reg_cache[i] = v; + } + + /* set defaults */ + isl29003_set_range(client, 0); + isl29003_set_resolution(client, 0); + isl29003_set_mode(client, 0); + isl29003_set_power_state(client, 0); + + return 0; +} + +/* + * I2C layer + */ + +static int __devinit isl29003_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct isl29003_data *data; + int err = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + return -EIO; + + data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->client = client; + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + + /* initialize the ISL29003 chip */ + err = isl29003_init_client(client); + if (err) + goto exit_kfree; + + /* register sysfs hooks */ + err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group); + if (err) + goto exit_kfree; + + dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION); + return 0; + +exit_kfree: + kfree(data); + return err; +} + +static int __devexit isl29003_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group); + isl29003_set_power_state(client, 0); + kfree(i2c_get_clientdata(client)); + return 0; +} + +#ifdef CONFIG_PM +static int isl29003_suspend(struct i2c_client *client, pm_message_t mesg) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + + data->power_state_before_suspend = isl29003_get_power_state(client); + return isl29003_set_power_state(client, 0); +} + +static int isl29003_resume(struct i2c_client *client) +{ + int i; + struct isl29003_data *data = i2c_get_clientdata(client); + + /* restore registers from cache */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) + if (i2c_smbus_write_byte_data(client, i, data->reg_cache[i])) + return -EIO; + + return isl29003_set_power_state(client, + data->power_state_before_suspend); +} + +#else +#define isl29003_suspend NULL +#define isl29003_resume NULL +#endif /* CONFIG_PM */ + +static const struct i2c_device_id isl29003_id[] = { + { "isl29003", 0 }, + {} +}; +MODULE_DEVICE_TABLE(i2c, isl29003_id); + +static struct i2c_driver isl29003_driver = { + .driver = { + .name = ISL29003_DRV_NAME, + .owner = THIS_MODULE, + }, + .suspend = isl29003_suspend, + .resume = isl29003_resume, + .probe = isl29003_probe, + .remove = __devexit_p(isl29003_remove), + .id_table = isl29003_id, +}; + +static int __init isl29003_init(void) +{ + return i2c_add_driver(&isl29003_driver); +} + +static void __exit isl29003_exit(void) +{ + i2c_del_driver(&isl29003_driver); +} + +MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); +MODULE_DESCRIPTION("ISL29003 ambient light sensor driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRIVER_VERSION); + +module_init(isl29003_init); +module_exit(isl29003_exit); + diff --git a/drivers/misc/iwmc3200top/Kconfig b/drivers/misc/iwmc3200top/Kconfig new file mode 100644 index 00000000000..9e4b88fb57f --- /dev/null +++ b/drivers/misc/iwmc3200top/Kconfig @@ -0,0 +1,20 @@ +config IWMC3200TOP + tristate "Intel Wireless MultiCom Top Driver" + depends on MMC && EXPERIMENTAL + select FW_LOADER + ---help--- + Intel Wireless MultiCom 3200 Top driver is responsible for + for firmware load and enabled coms enumeration + +config IWMC3200TOP_DEBUG + bool "Enable full debug output of iwmc3200top Driver" + depends on IWMC3200TOP + ---help--- + Enable full debug output of iwmc3200top Driver + +config IWMC3200TOP_DEBUGFS + bool "Enable Debugfs debugging interface for iwmc3200top" + depends on IWMC3200TOP + ---help--- + Enable creation of debugfs files for iwmc3200top + diff --git a/drivers/misc/iwmc3200top/Makefile b/drivers/misc/iwmc3200top/Makefile new file mode 100644 index 00000000000..fbf53fb4634 --- /dev/null +++ b/drivers/misc/iwmc3200top/Makefile @@ -0,0 +1,29 @@ +# iwmc3200top - Intel Wireless MultiCom 3200 Top Driver +# drivers/misc/iwmc3200top/Makefile +# +# Copyright (C) 2009 Intel Corporation. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License version +# 2 as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# +# Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> +# - +# +# + +obj-$(CONFIG_IWMC3200TOP) += iwmc3200top.o +iwmc3200top-objs := main.o fw-download.o +iwmc3200top-$(CONFIG_IWMC3200TOP_DEBUG) += log.o +iwmc3200top-$(CONFIG_IWMC3200TOP_DEBUGFS) += debugfs.o diff --git a/drivers/misc/iwmc3200top/debugfs.c b/drivers/misc/iwmc3200top/debugfs.c new file mode 100644 index 00000000000..0c8ea0a1c8a --- /dev/null +++ b/drivers/misc/iwmc3200top/debugfs.c @@ -0,0 +1,133 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/debufs.c + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/mmc/sdio_func.h> +#include <linux/mmc/sdio.h> +#include <linux/debugfs.h> + +#include "iwmc3200top.h" +#include "fw-msg.h" +#include "log.h" +#include "debugfs.h" + + + +/* Constants definition */ +#define HEXADECIMAL_RADIX 16 + +/* Functions definition */ + + +#define DEBUGFS_ADD(name, parent) do { \ + dbgfs->dbgfs_##parent##_files.file_##name = \ + debugfs_create_file(#name, 0644, dbgfs->dir_##parent, priv, \ + &iwmct_dbgfs_##name##_ops); \ +} while (0) + +#define DEBUGFS_RM(name) do { \ + debugfs_remove(name); \ + name = NULL; \ +} while (0) + +#define DEBUGFS_READ_FUNC(name) \ +ssize_t iwmct_dbgfs_##name##_read(struct file *file, \ + char __user *user_buf, \ + size_t count, loff_t *ppos); + +#define DEBUGFS_WRITE_FUNC(name) \ +ssize_t iwmct_dbgfs_##name##_write(struct file *file, \ + const char __user *user_buf, \ + size_t count, loff_t *ppos); + +#define DEBUGFS_READ_FILE_OPS(name) \ + DEBUGFS_READ_FUNC(name) \ + static const struct file_operations iwmct_dbgfs_##name##_ops = { \ + .read = iwmct_dbgfs_##name##_read, \ + .open = iwmct_dbgfs_open_file_generic, \ + }; + +#define DEBUGFS_WRITE_FILE_OPS(name) \ + DEBUGFS_WRITE_FUNC(name) \ + static const struct file_operations iwmct_dbgfs_##name##_ops = { \ + .write = iwmct_dbgfs_##name##_write, \ + .open = iwmct_dbgfs_open_file_generic, \ + }; + +#define DEBUGFS_READ_WRITE_FILE_OPS(name) \ + DEBUGFS_READ_FUNC(name) \ + DEBUGFS_WRITE_FUNC(name) \ + static const struct file_operations iwmct_dbgfs_##name##_ops = {\ + .write = iwmct_dbgfs_##name##_write, \ + .read = iwmct_dbgfs_##name##_read, \ + .open = iwmct_dbgfs_open_file_generic, \ + }; + + +/* Debugfs file ops definitions */ + +/* + * Create the debugfs files and directories + * + */ +void iwmct_dbgfs_register(struct iwmct_priv *priv, const char *name) +{ + struct iwmct_debugfs *dbgfs; + + dbgfs = kzalloc(sizeof(struct iwmct_debugfs), GFP_KERNEL); + if (!dbgfs) { + LOG_ERROR(priv, DEBUGFS, "failed to allocate %zd bytes\n", + sizeof(struct iwmct_debugfs)); + return; + } + + priv->dbgfs = dbgfs; + dbgfs->name = name; + dbgfs->dir_drv = debugfs_create_dir(name, NULL); + if (!dbgfs->dir_drv) { + LOG_ERROR(priv, DEBUGFS, "failed to create debugfs dir\n"); + return; + } + + return; +} + +/** + * Remove the debugfs files and directories + * + */ +void iwmct_dbgfs_unregister(struct iwmct_debugfs *dbgfs) +{ + if (!dbgfs) + return; + + DEBUGFS_RM(dbgfs->dir_drv); + kfree(dbgfs); + dbgfs = NULL; +} + diff --git a/drivers/misc/iwmc3200top/debugfs.h b/drivers/misc/iwmc3200top/debugfs.h new file mode 100644 index 00000000000..71d45759b40 --- /dev/null +++ b/drivers/misc/iwmc3200top/debugfs.h @@ -0,0 +1,58 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/debufs.h + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#ifndef __DEBUGFS_H__ +#define __DEBUGFS_H__ + + +#ifdef CONFIG_IWMC3200TOP_DEBUGFS + +struct iwmct_debugfs { + const char *name; + struct dentry *dir_drv; + struct dir_drv_files { + } dbgfs_drv_files; +}; + +void iwmct_dbgfs_register(struct iwmct_priv *priv, const char *name); +void iwmct_dbgfs_unregister(struct iwmct_debugfs *dbgfs); + +#else /* CONFIG_IWMC3200TOP_DEBUGFS */ + +struct iwmct_debugfs; + +static inline void +iwmct_dbgfs_register(struct iwmct_priv *priv, const char *name) +{} + +static inline void +iwmct_dbgfs_unregister(struct iwmct_debugfs *dbgfs) +{} + +#endif /* CONFIG_IWMC3200TOP_DEBUGFS */ + +#endif /* __DEBUGFS_H__ */ + diff --git a/drivers/misc/iwmc3200top/fw-download.c b/drivers/misc/iwmc3200top/fw-download.c new file mode 100644 index 00000000000..50d431e469f --- /dev/null +++ b/drivers/misc/iwmc3200top/fw-download.c @@ -0,0 +1,355 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/fw-download.c + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#include <linux/firmware.h> +#include <linux/mmc/sdio_func.h> +#include <asm/unaligned.h> + +#include "iwmc3200top.h" +#include "log.h" +#include "fw-msg.h" + +#define CHECKSUM_BYTES_NUM sizeof(u32) + +/** + init parser struct with file + */ +static int iwmct_fw_parser_init(struct iwmct_priv *priv, const u8 *file, + size_t file_size, size_t block_size) +{ + struct iwmct_parser *parser = &priv->parser; + struct iwmct_fw_hdr *fw_hdr = &parser->versions; + + LOG_INFOEX(priv, INIT, "-->\n"); + + LOG_INFO(priv, FW_DOWNLOAD, "file_size=%zd\n", file_size); + + parser->file = file; + parser->file_size = file_size; + parser->cur_pos = 0; + parser->buf = NULL; + + parser->buf = kzalloc(block_size, GFP_KERNEL); + if (!parser->buf) { + LOG_ERROR(priv, FW_DOWNLOAD, "kzalloc error\n"); + return -ENOMEM; + } + parser->buf_size = block_size; + + /* extract fw versions */ + memcpy(fw_hdr, parser->file, sizeof(struct iwmct_fw_hdr)); + LOG_INFO(priv, FW_DOWNLOAD, "fw versions are:\n" + "top %u.%u.%u gps %u.%u.%u bt %u.%u.%u tic %s\n", + fw_hdr->top_major, fw_hdr->top_minor, fw_hdr->top_revision, + fw_hdr->gps_major, fw_hdr->gps_minor, fw_hdr->gps_revision, + fw_hdr->bt_major, fw_hdr->bt_minor, fw_hdr->bt_revision, + fw_hdr->tic_name); + + parser->cur_pos += sizeof(struct iwmct_fw_hdr); + + LOG_INFOEX(priv, INIT, "<--\n"); + return 0; +} + +static bool iwmct_checksum(struct iwmct_priv *priv) +{ + struct iwmct_parser *parser = &priv->parser; + __le32 *file = (__le32 *)parser->file; + int i, pad, steps; + u32 accum = 0; + u32 checksum; + u32 mask = 0xffffffff; + + pad = (parser->file_size - CHECKSUM_BYTES_NUM) % 4; + steps = (parser->file_size - CHECKSUM_BYTES_NUM) / 4; + + LOG_INFO(priv, FW_DOWNLOAD, "pad=%d steps=%d\n", pad, steps); + + for (i = 0; i < steps; i++) + accum += le32_to_cpu(file[i]); + + if (pad) { + mask <<= 8 * (4 - pad); + accum += le32_to_cpu(file[steps]) & mask; + } + + checksum = get_unaligned_le32((__le32 *)(parser->file + + parser->file_size - CHECKSUM_BYTES_NUM)); + + LOG_INFO(priv, FW_DOWNLOAD, + "compare checksum accum=0x%x to checksum=0x%x\n", + accum, checksum); + + return checksum == accum; +} + +static int iwmct_parse_next_section(struct iwmct_priv *priv, const u8 **p_sec, + size_t *sec_size, __le32 *sec_addr) +{ + struct iwmct_parser *parser = &priv->parser; + struct iwmct_dbg *dbg = &priv->dbg; + struct iwmct_fw_sec_hdr *sec_hdr; + + LOG_INFOEX(priv, INIT, "-->\n"); + + while (parser->cur_pos + sizeof(struct iwmct_fw_sec_hdr) + <= parser->file_size) { + + sec_hdr = (struct iwmct_fw_sec_hdr *) + (parser->file + parser->cur_pos); + parser->cur_pos += sizeof(struct iwmct_fw_sec_hdr); + + LOG_INFO(priv, FW_DOWNLOAD, + "sec hdr: type=%s addr=0x%x size=%d\n", + sec_hdr->type, sec_hdr->target_addr, + sec_hdr->data_size); + + if (strcmp(sec_hdr->type, "ENT") == 0) + parser->entry_point = le32_to_cpu(sec_hdr->target_addr); + else if (strcmp(sec_hdr->type, "LBL") == 0) + strcpy(dbg->label_fw, parser->file + parser->cur_pos); + else if (((strcmp(sec_hdr->type, "TOP") == 0) && + (priv->barker & BARKER_DNLOAD_TOP_MSK)) || + ((strcmp(sec_hdr->type, "GPS") == 0) && + (priv->barker & BARKER_DNLOAD_GPS_MSK)) || + ((strcmp(sec_hdr->type, "BTH") == 0) && + (priv->barker & BARKER_DNLOAD_BT_MSK))) { + *sec_addr = sec_hdr->target_addr; + *sec_size = le32_to_cpu(sec_hdr->data_size); + *p_sec = parser->file + parser->cur_pos; + parser->cur_pos += le32_to_cpu(sec_hdr->data_size); + return 1; + } else if (strcmp(sec_hdr->type, "LOG") != 0) + LOG_WARNING(priv, FW_DOWNLOAD, + "skipping section type %s\n", + sec_hdr->type); + + parser->cur_pos += le32_to_cpu(sec_hdr->data_size); + LOG_INFO(priv, FW_DOWNLOAD, + "finished with section cur_pos=%zd\n", parser->cur_pos); + } + + LOG_INFOEX(priv, INIT, "<--\n"); + return 0; +} + +static int iwmct_download_section(struct iwmct_priv *priv, const u8 *p_sec, + size_t sec_size, __le32 addr) +{ + struct iwmct_parser *parser = &priv->parser; + struct iwmct_fw_load_hdr *hdr = (struct iwmct_fw_load_hdr *)parser->buf; + const u8 *cur_block = p_sec; + size_t sent = 0; + int cnt = 0; + int ret = 0; + u32 cmd = 0; + + LOG_INFOEX(priv, INIT, "-->\n"); + LOG_INFO(priv, FW_DOWNLOAD, "Download address 0x%x size 0x%zx\n", + addr, sec_size); + + while (sent < sec_size) { + int i; + u32 chksm = 0; + u32 reset = atomic_read(&priv->reset); + /* actual FW data */ + u32 data_size = min(parser->buf_size - sizeof(*hdr), + sec_size - sent); + /* Pad to block size */ + u32 trans_size = (data_size + sizeof(*hdr) + + IWMC_SDIO_BLK_SIZE - 1) & + ~(IWMC_SDIO_BLK_SIZE - 1); + ++cnt; + + /* in case of reset, interrupt FW DOWNLAOD */ + if (reset) { + LOG_INFO(priv, FW_DOWNLOAD, + "Reset detected. Abort FW download!!!"); + ret = -ECANCELED; + goto exit; + } + + memset(parser->buf, 0, parser->buf_size); + cmd |= IWMC_OPCODE_WRITE << CMD_HDR_OPCODE_POS; + cmd |= IWMC_CMD_SIGNATURE << CMD_HDR_SIGNATURE_POS; + cmd |= (priv->dbg.direct ? 1 : 0) << CMD_HDR_DIRECT_ACCESS_POS; + cmd |= (priv->dbg.checksum ? 1 : 0) << CMD_HDR_USE_CHECKSUM_POS; + hdr->data_size = cpu_to_le32(data_size); + hdr->target_addr = addr; + + /* checksum is allowed for sizes divisible by 4 */ + if (data_size & 0x3) + cmd &= ~CMD_HDR_USE_CHECKSUM_MSK; + + memcpy(hdr->data, cur_block, data_size); + + + if (cmd & CMD_HDR_USE_CHECKSUM_MSK) { + + chksm = data_size + le32_to_cpu(addr) + cmd; + for (i = 0; i < data_size >> 2; i++) + chksm += ((u32 *)cur_block)[i]; + + hdr->block_chksm = cpu_to_le32(chksm); + LOG_INFO(priv, FW_DOWNLOAD, "Checksum = 0x%X\n", + hdr->block_chksm); + } + + LOG_INFO(priv, FW_DOWNLOAD, "trans#%d, len=%d, sent=%zd, " + "sec_size=%zd, startAddress 0x%X\n", + cnt, trans_size, sent, sec_size, addr); + + if (priv->dbg.dump) + LOG_HEXDUMP(FW_DOWNLOAD, parser->buf, trans_size); + + + hdr->cmd = cpu_to_le32(cmd); + /* send it down */ + /* TODO: add more proper sending and error checking */ + ret = iwmct_tx(priv, 0, parser->buf, trans_size); + if (ret != 0) { + LOG_INFO(priv, FW_DOWNLOAD, + "iwmct_tx returned %d\n", ret); + goto exit; + } + + addr = cpu_to_le32(le32_to_cpu(addr) + data_size); + sent += data_size; + cur_block = p_sec + sent; + + if (priv->dbg.blocks && (cnt + 1) >= priv->dbg.blocks) { + LOG_INFO(priv, FW_DOWNLOAD, + "Block number limit is reached [%d]\n", + priv->dbg.blocks); + break; + } + } + + if (sent < sec_size) + ret = -EINVAL; +exit: + LOG_INFOEX(priv, INIT, "<--\n"); + return ret; +} + +static int iwmct_kick_fw(struct iwmct_priv *priv, bool jump) +{ + struct iwmct_parser *parser = &priv->parser; + struct iwmct_fw_load_hdr *hdr = (struct iwmct_fw_load_hdr *)parser->buf; + int ret; + u32 cmd; + + LOG_INFOEX(priv, INIT, "-->\n"); + + memset(parser->buf, 0, parser->buf_size); + cmd = IWMC_CMD_SIGNATURE << CMD_HDR_SIGNATURE_POS; + if (jump) { + cmd |= IWMC_OPCODE_JUMP << CMD_HDR_OPCODE_POS; + hdr->target_addr = cpu_to_le32(parser->entry_point); + LOG_INFO(priv, FW_DOWNLOAD, "jump address 0x%x\n", + parser->entry_point); + } else { + cmd |= IWMC_OPCODE_LAST_COMMAND << CMD_HDR_OPCODE_POS; + LOG_INFO(priv, FW_DOWNLOAD, "last command\n"); + } + + hdr->cmd = cpu_to_le32(cmd); + + LOG_HEXDUMP(FW_DOWNLOAD, parser->buf, sizeof(*hdr)); + /* send it down */ + /* TODO: add more proper sending and error checking */ + ret = iwmct_tx(priv, 0, parser->buf, IWMC_SDIO_BLK_SIZE); + if (ret) + LOG_INFO(priv, FW_DOWNLOAD, "iwmct_tx returned %d", ret); + + LOG_INFOEX(priv, INIT, "<--\n"); + return 0; +} + +int iwmct_fw_load(struct iwmct_priv *priv) +{ + const u8 *fw_name = FW_NAME(FW_API_VER); + const struct firmware *raw; + const u8 *pdata; + size_t len; + __le32 addr; + int ret; + + /* clear parser struct */ + memset(&priv->parser, 0, sizeof(struct iwmct_parser)); + + /* get the firmware */ + ret = request_firmware(&raw, fw_name, &priv->func->dev); + if (ret < 0) { + LOG_ERROR(priv, FW_DOWNLOAD, "%s request_firmware failed %d\n", + fw_name, ret); + goto exit; + } + + if (raw->size < sizeof(struct iwmct_fw_sec_hdr)) { + LOG_ERROR(priv, FW_DOWNLOAD, "%s smaller then (%zd) (%zd)\n", + fw_name, sizeof(struct iwmct_fw_sec_hdr), raw->size); + goto exit; + } + + LOG_INFO(priv, FW_DOWNLOAD, "Read firmware '%s'\n", fw_name); + + ret = iwmct_fw_parser_init(priv, raw->data, raw->size, priv->trans_len); + if (ret < 0) { + LOG_ERROR(priv, FW_DOWNLOAD, + "iwmct_parser_init failed: Reason %d\n", ret); + goto exit; + } + + /* checksum */ + if (!iwmct_checksum(priv)) { + LOG_ERROR(priv, FW_DOWNLOAD, "checksum error\n"); + ret = -EINVAL; + goto exit; + } + + /* download firmware to device */ + while (iwmct_parse_next_section(priv, &pdata, &len, &addr)) { + if (iwmct_download_section(priv, pdata, len, addr)) { + LOG_ERROR(priv, FW_DOWNLOAD, + "%s download section failed\n", fw_name); + ret = -EIO; + goto exit; + } + } + + iwmct_kick_fw(priv, !!(priv->barker & BARKER_DNLOAD_JUMP_MSK)); + +exit: + kfree(priv->parser.buf); + + if (raw) + release_firmware(raw); + + raw = NULL; + + return ret; +} diff --git a/drivers/misc/iwmc3200top/fw-msg.h b/drivers/misc/iwmc3200top/fw-msg.h new file mode 100644 index 00000000000..9e26b75bd48 --- /dev/null +++ b/drivers/misc/iwmc3200top/fw-msg.h @@ -0,0 +1,113 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/fw-msg.h + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#ifndef __FWMSG_H__ +#define __FWMSG_H__ + +#define COMM_TYPE_D2H 0xFF +#define COMM_TYPE_H2D 0xEE + +#define COMM_CATEGORY_OPERATIONAL 0x00 +#define COMM_CATEGORY_DEBUG 0x01 +#define COMM_CATEGORY_TESTABILITY 0x02 +#define COMM_CATEGORY_DIAGNOSTICS 0x03 + +#define OP_DBG_ZSTR_MSG cpu_to_le16(0x1A) + +#define FW_LOG_SRC_MAX 32 +#define FW_LOG_SRC_ALL 255 + +#define FW_STRING_TABLE_ADDR cpu_to_le32(0x0C000000) + +#define CMD_DBG_LOG_LEVEL cpu_to_le16(0x0001) +#define CMD_TST_DEV_RESET cpu_to_le16(0x0060) +#define CMD_TST_FUNC_RESET cpu_to_le16(0x0062) +#define CMD_TST_IFACE_RESET cpu_to_le16(0x0064) +#define CMD_TST_CPU_UTILIZATION cpu_to_le16(0x0065) +#define CMD_TST_TOP_DEEP_SLEEP cpu_to_le16(0x0080) +#define CMD_TST_WAKEUP cpu_to_le16(0x0081) +#define CMD_TST_FUNC_WAKEUP cpu_to_le16(0x0082) +#define CMD_TST_FUNC_DEEP_SLEEP_REQUEST cpu_to_le16(0x0083) +#define CMD_TST_GET_MEM_DUMP cpu_to_le16(0x0096) + +#define OP_OPR_ALIVE cpu_to_le16(0x0010) +#define OP_OPR_CMD_ACK cpu_to_le16(0x001F) +#define OP_OPR_CMD_NACK cpu_to_le16(0x0020) +#define OP_TST_MEM_DUMP cpu_to_le16(0x0043) + +#define CMD_FLAG_PADDING_256 0x80 + +#define FW_HCMD_BLOCK_SIZE 256 + +struct msg_hdr { + u8 type; + u8 category; + __le16 opcode; + u8 seqnum; + u8 flags; + __le16 length; +} __attribute__((__packed__)); + +struct log_hdr { + __le32 timestamp; + u8 severity; + u8 logsource; + __le16 reserved; +} __attribute__((__packed__)); + +struct mdump_hdr { + u8 dmpid; + u8 frag; + __le16 size; + __le32 addr; +} __attribute__((__packed__)); + +struct top_msg { + struct msg_hdr hdr; + union { + /* D2H messages */ + struct { + struct log_hdr log_hdr; + u8 data[1]; + } __attribute__((__packed__)) log; + + struct { + struct log_hdr log_hdr; + struct mdump_hdr md_hdr; + u8 data[1]; + } __attribute__((__packed__)) mdump; + + /* H2D messages */ + struct { + u8 logsource; + u8 sevmask; + } __attribute__((__packed__)) logdefs[FW_LOG_SRC_MAX]; + struct mdump_hdr mdump_req; + } u; +} __attribute__((__packed__)); + + +#endif /* __FWMSG_H__ */ diff --git a/drivers/misc/iwmc3200top/iwmc3200top.h b/drivers/misc/iwmc3200top/iwmc3200top.h new file mode 100644 index 00000000000..43bd510e187 --- /dev/null +++ b/drivers/misc/iwmc3200top/iwmc3200top.h @@ -0,0 +1,209 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/iwmc3200top.h + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#ifndef __IWMC3200TOP_H__ +#define __IWMC3200TOP_H__ + +#include <linux/workqueue.h> + +#define DRV_NAME "iwmc3200top" +#define FW_API_VER 1 +#define _FW_NAME(api) DRV_NAME "." #api ".fw" +#define FW_NAME(api) _FW_NAME(api) + +#define IWMC_SDIO_BLK_SIZE 256 +#define IWMC_DEFAULT_TR_BLK 64 +#define IWMC_SDIO_DATA_ADDR 0x0 +#define IWMC_SDIO_INTR_ENABLE_ADDR 0x14 +#define IWMC_SDIO_INTR_STATUS_ADDR 0x13 +#define IWMC_SDIO_INTR_CLEAR_ADDR 0x13 +#define IWMC_SDIO_INTR_GET_SIZE_ADDR 0x2C + +#define COMM_HUB_HEADER_LENGTH 16 +#define LOGGER_HEADER_LENGTH 10 + + +#define BARKER_DNLOAD_BT_POS 0 +#define BARKER_DNLOAD_BT_MSK BIT(BARKER_DNLOAD_BT_POS) +#define BARKER_DNLOAD_GPS_POS 1 +#define BARKER_DNLOAD_GPS_MSK BIT(BARKER_DNLOAD_GPS_POS) +#define BARKER_DNLOAD_TOP_POS 2 +#define BARKER_DNLOAD_TOP_MSK BIT(BARKER_DNLOAD_TOP_POS) +#define BARKER_DNLOAD_RESERVED1_POS 3 +#define BARKER_DNLOAD_RESERVED1_MSK BIT(BARKER_DNLOAD_RESERVED1_POS) +#define BARKER_DNLOAD_JUMP_POS 4 +#define BARKER_DNLOAD_JUMP_MSK BIT(BARKER_DNLOAD_JUMP_POS) +#define BARKER_DNLOAD_SYNC_POS 5 +#define BARKER_DNLOAD_SYNC_MSK BIT(BARKER_DNLOAD_SYNC_POS) +#define BARKER_DNLOAD_RESERVED2_POS 6 +#define BARKER_DNLOAD_RESERVED2_MSK (0x3 << BARKER_DNLOAD_RESERVED2_POS) +#define BARKER_DNLOAD_BARKER_POS 8 +#define BARKER_DNLOAD_BARKER_MSK (0xffffff << BARKER_DNLOAD_BARKER_POS) + +#define IWMC_BARKER_REBOOT (0xdeadbe << BARKER_DNLOAD_BARKER_POS) +/* whole field barker */ +#define IWMC_BARKER_ACK 0xfeedbabe + +#define IWMC_CMD_SIGNATURE 0xcbbc + +#define CMD_HDR_OPCODE_POS 0 +#define CMD_HDR_OPCODE_MSK_MSK (0xf << CMD_HDR_OPCODE_MSK_POS) +#define CMD_HDR_RESPONSE_CODE_POS 4 +#define CMD_HDR_RESPONSE_CODE_MSK (0xf << CMD_HDR_RESPONSE_CODE_POS) +#define CMD_HDR_USE_CHECKSUM_POS 8 +#define CMD_HDR_USE_CHECKSUM_MSK BIT(CMD_HDR_USE_CHECKSUM_POS) +#define CMD_HDR_RESPONSE_REQUIRED_POS 9 +#define CMD_HDR_RESPONSE_REQUIRED_MSK BIT(CMD_HDR_RESPONSE_REQUIRED_POS) +#define CMD_HDR_DIRECT_ACCESS_POS 10 +#define CMD_HDR_DIRECT_ACCESS_MSK BIT(CMD_HDR_DIRECT_ACCESS_POS) +#define CMD_HDR_RESERVED_POS 11 +#define CMD_HDR_RESERVED_MSK BIT(0x1f << CMD_HDR_RESERVED_POS) +#define CMD_HDR_SIGNATURE_POS 16 +#define CMD_HDR_SIGNATURE_MSK BIT(0xffff << CMD_HDR_SIGNATURE_POS) + +enum { + IWMC_OPCODE_PING = 0, + IWMC_OPCODE_READ = 1, + IWMC_OPCODE_WRITE = 2, + IWMC_OPCODE_JUMP = 3, + IWMC_OPCODE_REBOOT = 4, + IWMC_OPCODE_PERSISTENT_WRITE = 5, + IWMC_OPCODE_PERSISTENT_READ = 6, + IWMC_OPCODE_READ_MODIFY_WRITE = 7, + IWMC_OPCODE_LAST_COMMAND = 15 +}; + +struct iwmct_fw_load_hdr { + __le32 cmd; + __le32 target_addr; + __le32 data_size; + __le32 block_chksm; + u8 data[0]; +}; + +/** + * struct iwmct_fw_hdr + * holds all sw components versions + */ +struct iwmct_fw_hdr { + u8 top_major; + u8 top_minor; + u8 top_revision; + u8 gps_major; + u8 gps_minor; + u8 gps_revision; + u8 bt_major; + u8 bt_minor; + u8 bt_revision; + u8 tic_name[31]; +}; + +/** + * struct iwmct_fw_sec_hdr + * @type: function type + * @data_size: section's data size + * @target_addr: download address + */ +struct iwmct_fw_sec_hdr { + u8 type[4]; + __le32 data_size; + __le32 target_addr; +}; + +/** + * struct iwmct_parser + * @file: fw image + * @file_size: fw size + * @cur_pos: position in file + * @buf: temp buf for download + * @buf_size: size of buf + * @entry_point: address to jump in fw kick-off + */ +struct iwmct_parser { + const u8 *file; + size_t file_size; + size_t cur_pos; + u8 *buf; + size_t buf_size; + u32 entry_point; + struct iwmct_fw_hdr versions; +}; + + +struct iwmct_work_struct { + struct list_head list; + ssize_t iosize; +}; + +struct iwmct_dbg { + int blocks; + bool dump; + bool jump; + bool direct; + bool checksum; + bool fw_download; + int block_size; + int download_trans_blks; + + char label_fw[256]; +}; + +struct iwmct_debugfs; + +struct iwmct_priv { + struct sdio_func *func; + struct iwmct_debugfs *dbgfs; + struct iwmct_parser parser; + atomic_t reset; + atomic_t dev_sync; + u32 trans_len; + u32 barker; + struct iwmct_dbg dbg; + + /* drivers work queue */ + struct workqueue_struct *wq; + struct workqueue_struct *bus_rescan_wq; + struct work_struct bus_rescan_worker; + struct work_struct isr_worker; + + /* drivers wait queue */ + wait_queue_head_t wait_q; + + /* rx request list */ + struct list_head read_req_list; +}; + +extern int iwmct_tx(struct iwmct_priv *priv, unsigned int addr, + void *src, int count); + +extern int iwmct_fw_load(struct iwmct_priv *priv); + +extern void iwmct_dbg_init_params(struct iwmct_priv *drv); +extern void iwmct_dbg_init_drv_attrs(struct device_driver *drv); +extern void iwmct_dbg_remove_drv_attrs(struct device_driver *drv); +extern int iwmct_send_hcmd(struct iwmct_priv *priv, u8 *cmd, u16 len); + +#endif /* __IWMC3200TOP_H__ */ diff --git a/drivers/misc/iwmc3200top/log.c b/drivers/misc/iwmc3200top/log.c new file mode 100644 index 00000000000..d569279698f --- /dev/null +++ b/drivers/misc/iwmc3200top/log.c @@ -0,0 +1,347 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/log.c + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#include <linux/kernel.h> +#include <linux/mmc/sdio_func.h> +#include <linux/ctype.h> +#include "fw-msg.h" +#include "iwmc3200top.h" +#include "log.h" + +/* Maximal hexadecimal string size of the FW memdump message */ +#define LOG_MSG_SIZE_MAX 12400 + +/* iwmct_logdefs is a global used by log macros */ +u8 iwmct_logdefs[LOG_SRC_MAX]; +static u8 iwmct_fw_logdefs[FW_LOG_SRC_MAX]; + + +static int _log_set_log_filter(u8 *logdefs, int size, u8 src, u8 logmask) +{ + int i; + + if (src < size) + logdefs[src] = logmask; + else if (src == LOG_SRC_ALL) + for (i = 0; i < size; i++) + logdefs[i] = logmask; + else + return -1; + + return 0; +} + + +int iwmct_log_set_filter(u8 src, u8 logmask) +{ + return _log_set_log_filter(iwmct_logdefs, LOG_SRC_MAX, src, logmask); +} + + +int iwmct_log_set_fw_filter(u8 src, u8 logmask) +{ + return _log_set_log_filter(iwmct_fw_logdefs, + FW_LOG_SRC_MAX, src, logmask); +} + + +static int log_msg_format_hex(char *str, int slen, u8 *ibuf, + int ilen, char *pref) +{ + int pos = 0; + int i; + int len; + + for (pos = 0, i = 0; pos < slen - 2 && pref[i] != '\0'; i++, pos++) + str[pos] = pref[i]; + + for (i = 0; pos < slen - 2 && i < ilen; pos += len, i++) + len = snprintf(&str[pos], slen - pos - 1, " %2.2X", ibuf[i]); + + if (i < ilen) + return -1; + + return 0; +} + +/* NOTE: This function is not thread safe. + Currently it's called only from sdio rx worker - no race there +*/ +void iwmct_log_top_message(struct iwmct_priv *priv, u8 *buf, int len) +{ + struct top_msg *msg; + static char logbuf[LOG_MSG_SIZE_MAX]; + + msg = (struct top_msg *)buf; + + if (len < sizeof(msg->hdr) + sizeof(msg->u.log.log_hdr)) { + LOG_ERROR(priv, FW_MSG, "Log message from TOP " + "is too short %d (expected %zd)\n", + len, sizeof(msg->hdr) + sizeof(msg->u.log.log_hdr)); + return; + } + + if (!(iwmct_fw_logdefs[msg->u.log.log_hdr.logsource] & + BIT(msg->u.log.log_hdr.severity)) || + !(iwmct_logdefs[LOG_SRC_FW_MSG] & BIT(msg->u.log.log_hdr.severity))) + return; + + switch (msg->hdr.category) { + case COMM_CATEGORY_TESTABILITY: + if (!(iwmct_logdefs[LOG_SRC_TST] & + BIT(msg->u.log.log_hdr.severity))) + return; + if (log_msg_format_hex(logbuf, LOG_MSG_SIZE_MAX, buf, + le16_to_cpu(msg->hdr.length) + + sizeof(msg->hdr), "<TST>")) + LOG_WARNING(priv, TST, + "TOP TST message is too long, truncating..."); + LOG_WARNING(priv, TST, "%s\n", logbuf); + break; + case COMM_CATEGORY_DEBUG: + if (msg->hdr.opcode == OP_DBG_ZSTR_MSG) + LOG_INFO(priv, FW_MSG, "%s %s", "<DBG>", + ((u8 *)msg) + sizeof(msg->hdr) + + sizeof(msg->u.log.log_hdr)); + else { + if (log_msg_format_hex(logbuf, LOG_MSG_SIZE_MAX, buf, + le16_to_cpu(msg->hdr.length) + + sizeof(msg->hdr), + "<DBG>")) + LOG_WARNING(priv, FW_MSG, + "TOP DBG message is too long," + "truncating..."); + LOG_WARNING(priv, FW_MSG, "%s\n", logbuf); + } + break; + default: + break; + } +} + +static int _log_get_filter_str(u8 *logdefs, int logdefsz, char *buf, int size) +{ + int i, pos, len; + for (i = 0, pos = 0; (pos < size-1) && (i < logdefsz); i++) { + len = snprintf(&buf[pos], size - pos - 1, "0x%02X%02X,", + i, logdefs[i]); + pos += len; + } + buf[pos-1] = '\n'; + buf[pos] = '\0'; + + if (i < logdefsz) + return -1; + return 0; +} + +int log_get_filter_str(char *buf, int size) +{ + return _log_get_filter_str(iwmct_logdefs, LOG_SRC_MAX, buf, size); +} + +int log_get_fw_filter_str(char *buf, int size) +{ + return _log_get_filter_str(iwmct_fw_logdefs, FW_LOG_SRC_MAX, buf, size); +} + +#define HEXADECIMAL_RADIX 16 +#define LOG_SRC_FORMAT 7 /* log level is in format of "0xXXXX," */ + +ssize_t show_iwmct_log_level(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct iwmct_priv *priv = dev_get_drvdata(d); + char *str_buf; + int buf_size; + ssize_t ret; + + buf_size = (LOG_SRC_FORMAT * LOG_SRC_MAX) + 1; + str_buf = kzalloc(buf_size, GFP_KERNEL); + if (!str_buf) { + LOG_ERROR(priv, DEBUGFS, + "failed to allocate %d bytes\n", buf_size); + ret = -ENOMEM; + goto exit; + } + + if (log_get_filter_str(str_buf, buf_size) < 0) { + ret = -EINVAL; + goto exit; + } + + ret = sprintf(buf, "%s", str_buf); + +exit: + kfree(str_buf); + return ret; +} + +ssize_t store_iwmct_log_level(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct iwmct_priv *priv = dev_get_drvdata(d); + char *token, *str_buf = NULL; + long val; + ssize_t ret = count; + u8 src, mask; + + if (!count) + goto exit; + + str_buf = kzalloc(count, GFP_KERNEL); + if (!str_buf) { + LOG_ERROR(priv, DEBUGFS, + "failed to allocate %zd bytes\n", count); + ret = -ENOMEM; + goto exit; + } + + memcpy(str_buf, buf, count); + + while ((token = strsep(&str_buf, ",")) != NULL) { + while (isspace(*token)) + ++token; + if (strict_strtol(token, HEXADECIMAL_RADIX, &val)) { + LOG_ERROR(priv, DEBUGFS, + "failed to convert string to long %s\n", + token); + ret = -EINVAL; + goto exit; + } + + mask = val & 0xFF; + src = (val & 0XFF00) >> 8; + iwmct_log_set_filter(src, mask); + } + +exit: + kfree(str_buf); + return ret; +} + +ssize_t show_iwmct_log_level_fw(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct iwmct_priv *priv = dev_get_drvdata(d); + char *str_buf; + int buf_size; + ssize_t ret; + + buf_size = (LOG_SRC_FORMAT * FW_LOG_SRC_MAX) + 2; + + str_buf = kzalloc(buf_size, GFP_KERNEL); + if (!str_buf) { + LOG_ERROR(priv, DEBUGFS, + "failed to allocate %d bytes\n", buf_size); + ret = -ENOMEM; + goto exit; + } + + if (log_get_fw_filter_str(str_buf, buf_size) < 0) { + ret = -EINVAL; + goto exit; + } + + ret = sprintf(buf, "%s", str_buf); + +exit: + kfree(str_buf); + return ret; +} + +ssize_t store_iwmct_log_level_fw(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct iwmct_priv *priv = dev_get_drvdata(d); + struct top_msg cmd; + char *token, *str_buf = NULL; + ssize_t ret = count; + u16 cmdlen = 0; + int i; + long val; + u8 src, mask; + + if (!count) + goto exit; + + str_buf = kzalloc(count, GFP_KERNEL); + if (!str_buf) { + LOG_ERROR(priv, DEBUGFS, + "failed to allocate %zd bytes\n", count); + ret = -ENOMEM; + goto exit; + } + + memcpy(str_buf, buf, count); + + cmd.hdr.type = COMM_TYPE_H2D; + cmd.hdr.category = COMM_CATEGORY_DEBUG; + cmd.hdr.opcode = CMD_DBG_LOG_LEVEL; + + for (i = 0; ((token = strsep(&str_buf, ",")) != NULL) && + (i < FW_LOG_SRC_MAX); i++) { + + while (isspace(*token)) + ++token; + + if (strict_strtol(token, HEXADECIMAL_RADIX, &val)) { + LOG_ERROR(priv, DEBUGFS, + "failed to convert string to long %s\n", + token); + ret = -EINVAL; + goto exit; + } + + mask = val & 0xFF; /* LSB */ + src = (val & 0XFF00) >> 8; /* 2nd least significant byte. */ + iwmct_log_set_fw_filter(src, mask); + + cmd.u.logdefs[i].logsource = src; + cmd.u.logdefs[i].sevmask = mask; + } + + cmd.hdr.length = cpu_to_le16(i * sizeof(cmd.u.logdefs[0])); + cmdlen = (i * sizeof(cmd.u.logdefs[0]) + sizeof(cmd.hdr)); + + ret = iwmct_send_hcmd(priv, (u8 *)&cmd, cmdlen); + if (ret) { + LOG_ERROR(priv, DEBUGFS, + "Failed to send %d bytes of fwcmd, ret=%zd\n", + cmdlen, ret); + goto exit; + } else + LOG_INFO(priv, DEBUGFS, "fwcmd sent (%d bytes)\n", cmdlen); + + ret = count; + +exit: + kfree(str_buf); + return ret; +} + diff --git a/drivers/misc/iwmc3200top/log.h b/drivers/misc/iwmc3200top/log.h new file mode 100644 index 00000000000..aba8121f978 --- /dev/null +++ b/drivers/misc/iwmc3200top/log.h @@ -0,0 +1,158 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/log.h + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#ifndef __LOG_H__ +#define __LOG_H__ + + +/* log severity: + * The log levels here match FW log levels + * so values need to stay as is */ +#define LOG_SEV_CRITICAL 0 +#define LOG_SEV_ERROR 1 +#define LOG_SEV_WARNING 2 +#define LOG_SEV_INFO 3 +#define LOG_SEV_INFOEX 4 + +#define LOG_SEV_FILTER_ALL \ + (BIT(LOG_SEV_CRITICAL) | \ + BIT(LOG_SEV_ERROR) | \ + BIT(LOG_SEV_WARNING) | \ + BIT(LOG_SEV_INFO) | \ + BIT(LOG_SEV_INFOEX)) + +/* log source */ +#define LOG_SRC_INIT 0 +#define LOG_SRC_DEBUGFS 1 +#define LOG_SRC_FW_DOWNLOAD 2 +#define LOG_SRC_FW_MSG 3 +#define LOG_SRC_TST 4 +#define LOG_SRC_IRQ 5 + +#define LOG_SRC_MAX 6 +#define LOG_SRC_ALL 0xFF + +/** + * Default intitialization runtime log level + */ +#ifndef LOG_SEV_FILTER_RUNTIME +#define LOG_SEV_FILTER_RUNTIME \ + (BIT(LOG_SEV_CRITICAL) | \ + BIT(LOG_SEV_ERROR) | \ + BIT(LOG_SEV_WARNING)) +#endif + +#ifndef FW_LOG_SEV_FILTER_RUNTIME +#define FW_LOG_SEV_FILTER_RUNTIME LOG_SEV_FILTER_ALL +#endif + +#ifdef CONFIG_IWMC3200TOP_DEBUG +/** + * Log macros + */ + +#define priv2dev(priv) (&(priv->func)->dev) + +#define LOG_CRITICAL(priv, src, fmt, args...) \ +do { \ + if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_CRITICAL)) \ + dev_crit(priv2dev(priv), "%s %d: " fmt, \ + __func__, __LINE__, ##args); \ +} while (0) + +#define LOG_ERROR(priv, src, fmt, args...) \ +do { \ + if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_ERROR)) \ + dev_err(priv2dev(priv), "%s %d: " fmt, \ + __func__, __LINE__, ##args); \ +} while (0) + +#define LOG_WARNING(priv, src, fmt, args...) \ +do { \ + if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_WARNING)) \ + dev_warn(priv2dev(priv), "%s %d: " fmt, \ + __func__, __LINE__, ##args); \ +} while (0) + +#define LOG_INFO(priv, src, fmt, args...) \ +do { \ + if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_INFO)) \ + dev_info(priv2dev(priv), "%s %d: " fmt, \ + __func__, __LINE__, ##args); \ +} while (0) + +#define LOG_INFOEX(priv, src, fmt, args...) \ +do { \ + if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_INFOEX)) \ + dev_dbg(priv2dev(priv), "%s %d: " fmt, \ + __func__, __LINE__, ##args); \ +} while (0) + +#define LOG_HEXDUMP(src, ptr, len) \ +do { \ + if (iwmct_logdefs[LOG_SRC_ ## src] & BIT(LOG_SEV_INFOEX)) \ + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, \ + 16, 1, ptr, len, false); \ +} while (0) + +void iwmct_log_top_message(struct iwmct_priv *priv, u8 *buf, int len); + +extern u8 iwmct_logdefs[]; + +int iwmct_log_set_filter(u8 src, u8 logmask); +int iwmct_log_set_fw_filter(u8 src, u8 logmask); + +ssize_t show_iwmct_log_level(struct device *d, + struct device_attribute *attr, char *buf); +ssize_t store_iwmct_log_level(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count); +ssize_t show_iwmct_log_level_fw(struct device *d, + struct device_attribute *attr, char *buf); +ssize_t store_iwmct_log_level_fw(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count); + +#else + +#define LOG_CRITICAL(priv, src, fmt, args...) +#define LOG_ERROR(priv, src, fmt, args...) +#define LOG_WARNING(priv, src, fmt, args...) +#define LOG_INFO(priv, src, fmt, args...) +#define LOG_INFOEX(priv, src, fmt, args...) +#define LOG_HEXDUMP(src, ptr, len) + +static inline void iwmct_log_top_message(struct iwmct_priv *priv, + u8 *buf, int len) {} +static inline int iwmct_log_set_filter(u8 src, u8 logmask) { return 0; } +static inline int iwmct_log_set_fw_filter(u8 src, u8 logmask) { return 0; } + +#endif /* CONFIG_IWMC3200TOP_DEBUG */ + +int log_get_filter_str(char *buf, int size); +int log_get_fw_filter_str(char *buf, int size); + +#endif /* __LOG_H__ */ diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c new file mode 100644 index 00000000000..fafcaa481d7 --- /dev/null +++ b/drivers/misc/iwmc3200top/main.c @@ -0,0 +1,678 @@ +/* + * iwmc3200top - Intel Wireless MultiCom 3200 Top Driver + * drivers/misc/iwmc3200top/main.c + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Author Name: Maxim Grabarnik <maxim.grabarnink@intel.com> + * - + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include <linux/mmc/sdio_ids.h> +#include <linux/mmc/sdio_func.h> +#include <linux/mmc/sdio.h> + +#include "iwmc3200top.h" +#include "log.h" +#include "fw-msg.h" +#include "debugfs.h" + + +#define DRIVER_DESCRIPTION "Intel(R) IWMC 3200 Top Driver" +#define DRIVER_COPYRIGHT "Copyright (c) 2008 Intel Corporation." + +#define DRIVER_VERSION "0.1.62" + +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(DRIVER_COPYRIGHT); +MODULE_FIRMWARE(FW_NAME(FW_API_VER)); + +/* + * This workers main task is to wait for OP_OPR_ALIVE + * from TOP FW until ALIVE_MSG_TIMOUT timeout is elapsed. + * When OP_OPR_ALIVE received it will issue + * a call to "bus_rescan_devices". + */ +static void iwmct_rescan_worker(struct work_struct *ws) +{ + struct iwmct_priv *priv; + int ret; + + priv = container_of(ws, struct iwmct_priv, bus_rescan_worker); + + LOG_INFO(priv, FW_MSG, "Calling bus_rescan\n"); + + ret = bus_rescan_devices(priv->func->dev.bus); + if (ret < 0) + LOG_INFO(priv, FW_DOWNLOAD, "bus_rescan_devices FAILED!!!\n"); +} + +static void op_top_message(struct iwmct_priv *priv, struct top_msg *msg) +{ + switch (msg->hdr.opcode) { + case OP_OPR_ALIVE: + LOG_INFO(priv, FW_MSG, "Got ALIVE from device, wake rescan\n"); + queue_work(priv->bus_rescan_wq, &priv->bus_rescan_worker); + break; + default: + LOG_INFO(priv, FW_MSG, "Received msg opcode 0x%X\n", + msg->hdr.opcode); + break; + } +} + + +static void handle_top_message(struct iwmct_priv *priv, u8 *buf, int len) +{ + struct top_msg *msg; + + msg = (struct top_msg *)buf; + + if (msg->hdr.type != COMM_TYPE_D2H) { + LOG_ERROR(priv, FW_MSG, + "Message from TOP with invalid message type 0x%X\n", + msg->hdr.type); + return; + } + + if (len < sizeof(msg->hdr)) { + LOG_ERROR(priv, FW_MSG, + "Message from TOP is too short for message header " + "received %d bytes, expected at least %zd bytes\n", + len, sizeof(msg->hdr)); + return; + } + + if (len < le16_to_cpu(msg->hdr.length) + sizeof(msg->hdr)) { + LOG_ERROR(priv, FW_MSG, + "Message length (%d bytes) is shorter than " + "in header (%d bytes)\n", + len, le16_to_cpu(msg->hdr.length)); + return; + } + + switch (msg->hdr.category) { + case COMM_CATEGORY_OPERATIONAL: + op_top_message(priv, (struct top_msg *)buf); + break; + + case COMM_CATEGORY_DEBUG: + case COMM_CATEGORY_TESTABILITY: + case COMM_CATEGORY_DIAGNOSTICS: + iwmct_log_top_message(priv, buf, len); + break; + + default: + LOG_ERROR(priv, FW_MSG, + "Message from TOP with unknown category 0x%X\n", + msg->hdr.category); + break; + } +} + +int iwmct_send_hcmd(struct iwmct_priv *priv, u8 *cmd, u16 len) +{ + int ret; + u8 *buf; + + LOG_INFOEX(priv, FW_MSG, "Sending hcmd:\n"); + + /* add padding to 256 for IWMC */ + ((struct top_msg *)cmd)->hdr.flags |= CMD_FLAG_PADDING_256; + + LOG_HEXDUMP(FW_MSG, cmd, len); + + if (len > FW_HCMD_BLOCK_SIZE) { + LOG_ERROR(priv, FW_MSG, "size %d exceeded hcmd max size %d\n", + len, FW_HCMD_BLOCK_SIZE); + return -1; + } + + buf = kzalloc(FW_HCMD_BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + LOG_ERROR(priv, FW_MSG, "kzalloc error, buf size %d\n", + FW_HCMD_BLOCK_SIZE); + return -1; + } + + memcpy(buf, cmd, len); + + sdio_claim_host(priv->func); + ret = sdio_memcpy_toio(priv->func, IWMC_SDIO_DATA_ADDR, buf, + FW_HCMD_BLOCK_SIZE); + sdio_release_host(priv->func); + + kfree(buf); + return ret; +} + +int iwmct_tx(struct iwmct_priv *priv, unsigned int addr, + void *src, int count) +{ + int ret; + + sdio_claim_host(priv->func); + ret = sdio_memcpy_toio(priv->func, addr, src, count); + sdio_release_host(priv->func); + + return ret; +} + +static void iwmct_irq_read_worker(struct work_struct *ws) +{ + struct iwmct_priv *priv; + struct iwmct_work_struct *read_req; + __le32 *buf = NULL; + int ret; + int iosize; + u32 barker; + bool is_barker; + + priv = container_of(ws, struct iwmct_priv, isr_worker); + + LOG_INFO(priv, IRQ, "enter iwmct_irq_read_worker %p\n", ws); + + /* --------------------- Handshake with device -------------------- */ + sdio_claim_host(priv->func); + + /* all list manipulations have to be protected by + * sdio_claim_host/sdio_release_host */ + if (list_empty(&priv->read_req_list)) { + LOG_ERROR(priv, IRQ, "read_req_list empty in read worker\n"); + goto exit_release; + } + + read_req = list_entry(priv->read_req_list.next, + struct iwmct_work_struct, list); + + list_del(&read_req->list); + iosize = read_req->iosize; + kfree(read_req); + + buf = kzalloc(iosize, GFP_KERNEL); + if (!buf) { + LOG_ERROR(priv, IRQ, "kzalloc error, buf size %d\n", iosize); + goto exit_release; + } + + LOG_INFO(priv, IRQ, "iosize=%d, buf=%p, func=%d\n", + iosize, buf, priv->func->num); + + /* read from device */ + ret = sdio_memcpy_fromio(priv->func, buf, IWMC_SDIO_DATA_ADDR, iosize); + if (ret) { + LOG_ERROR(priv, IRQ, "error %d reading buffer\n", ret); + goto exit_release; + } + + LOG_HEXDUMP(IRQ, (u8 *)buf, iosize); + + barker = le32_to_cpu(buf[0]); + + /* Verify whether it's a barker and if not - treat as regular Rx */ + if (barker == IWMC_BARKER_ACK || + (barker & BARKER_DNLOAD_BARKER_MSK) == IWMC_BARKER_REBOOT) { + + /* Valid Barker is equal on first 4 dwords */ + is_barker = (buf[1] == buf[0]) && + (buf[2] == buf[0]) && + (buf[3] == buf[0]); + + if (!is_barker) { + LOG_WARNING(priv, IRQ, + "Potentially inconsistent barker " + "%08X_%08X_%08X_%08X\n", + le32_to_cpu(buf[0]), le32_to_cpu(buf[1]), + le32_to_cpu(buf[2]), le32_to_cpu(buf[3])); + } + } else { + is_barker = false; + } + + /* Handle Top CommHub message */ + if (!is_barker) { + sdio_release_host(priv->func); + handle_top_message(priv, (u8 *)buf, iosize); + goto exit; + } else if (barker == IWMC_BARKER_ACK) { /* Handle barkers */ + if (atomic_read(&priv->dev_sync) == 0) { + LOG_ERROR(priv, IRQ, + "ACK barker arrived out-of-sync\n"); + goto exit_release; + } + + /* Continuing to FW download (after Sync is completed)*/ + atomic_set(&priv->dev_sync, 0); + LOG_INFO(priv, IRQ, "ACK barker arrived " + "- starting FW download\n"); + } else { /* REBOOT barker */ + LOG_INFO(priv, IRQ, "Recieved reboot barker: %x\n", barker); + priv->barker = barker; + + if (barker & BARKER_DNLOAD_SYNC_MSK) { + /* Send the same barker back */ + ret = sdio_memcpy_toio(priv->func, IWMC_SDIO_DATA_ADDR, + buf, iosize); + if (ret) { + LOG_ERROR(priv, IRQ, + "error %d echoing barker\n", ret); + goto exit_release; + } + LOG_INFO(priv, IRQ, "Echoing barker to device\n"); + atomic_set(&priv->dev_sync, 1); + goto exit_release; + } + + /* Continuing to FW download (without Sync) */ + LOG_INFO(priv, IRQ, "No sync requested " + "- starting FW download\n"); + } + + sdio_release_host(priv->func); + + + LOG_INFO(priv, IRQ, "barker download request 0x%x is:\n", priv->barker); + LOG_INFO(priv, IRQ, "******* Top FW %s requested ********\n", + (priv->barker & BARKER_DNLOAD_TOP_MSK) ? "was" : "not"); + LOG_INFO(priv, IRQ, "******* GPS FW %s requested ********\n", + (priv->barker & BARKER_DNLOAD_GPS_MSK) ? "was" : "not"); + LOG_INFO(priv, IRQ, "******* BT FW %s requested ********\n", + (priv->barker & BARKER_DNLOAD_BT_MSK) ? "was" : "not"); + + if (priv->dbg.fw_download) + iwmct_fw_load(priv); + else + LOG_ERROR(priv, IRQ, "FW download not allowed\n"); + + goto exit; + +exit_release: + sdio_release_host(priv->func); +exit: + kfree(buf); + LOG_INFO(priv, IRQ, "exit iwmct_irq_read_worker\n"); +} + +static void iwmct_irq(struct sdio_func *func) +{ + struct iwmct_priv *priv; + int val, ret; + int iosize; + int addr = IWMC_SDIO_INTR_GET_SIZE_ADDR; + struct iwmct_work_struct *read_req; + + priv = sdio_get_drvdata(func); + + LOG_INFO(priv, IRQ, "enter iwmct_irq\n"); + + /* read the function's status register */ + val = sdio_readb(func, IWMC_SDIO_INTR_STATUS_ADDR, &ret); + + LOG_INFO(priv, IRQ, "iir value = %d, ret=%d\n", val, ret); + + if (!val) { + LOG_ERROR(priv, IRQ, "iir = 0, exiting ISR\n"); + goto exit_clear_intr; + } + + + /* + * read 2 bytes of the transaction size + * IMPORTANT: sdio transaction size has to be read before clearing + * sdio interrupt!!! + */ + val = sdio_readb(priv->func, addr++, &ret); + iosize = val; + val = sdio_readb(priv->func, addr++, &ret); + iosize += val << 8; + + LOG_INFO(priv, IRQ, "READ size %d\n", iosize); + + if (iosize == 0) { + LOG_ERROR(priv, IRQ, "READ size %d, exiting ISR\n", iosize); + goto exit_clear_intr; + } + + /* allocate a work structure to pass iosize to the worker */ + read_req = kzalloc(sizeof(struct iwmct_work_struct), GFP_KERNEL); + if (!read_req) { + LOG_ERROR(priv, IRQ, "failed to allocate read_req, exit ISR\n"); + goto exit_clear_intr; + } + + INIT_LIST_HEAD(&read_req->list); + read_req->iosize = iosize; + + list_add_tail(&priv->read_req_list, &read_req->list); + + /* clear the function's interrupt request bit (write 1 to clear) */ + sdio_writeb(func, 1, IWMC_SDIO_INTR_CLEAR_ADDR, &ret); + + queue_work(priv->wq, &priv->isr_worker); + + LOG_INFO(priv, IRQ, "exit iwmct_irq\n"); + + return; + +exit_clear_intr: + /* clear the function's interrupt request bit (write 1 to clear) */ + sdio_writeb(func, 1, IWMC_SDIO_INTR_CLEAR_ADDR, &ret); +} + + +static int blocks; +module_param(blocks, int, 0604); +MODULE_PARM_DESC(blocks, "max_blocks_to_send"); + +static int dump; +module_param(dump, bool, 0604); +MODULE_PARM_DESC(dump, "dump_hex_content"); + +static int jump = 1; +module_param(jump, bool, 0604); + +static int direct = 1; +module_param(direct, bool, 0604); + +static int checksum = 1; +module_param(checksum, bool, 0604); + +static int fw_download = 1; +module_param(fw_download, bool, 0604); + +static int block_size = IWMC_SDIO_BLK_SIZE; +module_param(block_size, int, 0404); + +static int download_trans_blks = IWMC_DEFAULT_TR_BLK; +module_param(download_trans_blks, int, 0604); + +static int rubbish_barker; +module_param(rubbish_barker, bool, 0604); + +#ifdef CONFIG_IWMC3200TOP_DEBUG +static int log_level[LOG_SRC_MAX]; +static unsigned int log_level_argc; +module_param_array(log_level, int, &log_level_argc, 0604); +MODULE_PARM_DESC(log_level, "log_level"); + +static int log_level_fw[FW_LOG_SRC_MAX]; +static unsigned int log_level_fw_argc; +module_param_array(log_level_fw, int, &log_level_fw_argc, 0604); +MODULE_PARM_DESC(log_level_fw, "log_level_fw"); +#endif + +void iwmct_dbg_init_params(struct iwmct_priv *priv) +{ +#ifdef CONFIG_IWMC3200TOP_DEBUG + int i; + + for (i = 0; i < log_level_argc; i++) { + dev_notice(&priv->func->dev, "log_level[%d]=0x%X\n", + i, log_level[i]); + iwmct_log_set_filter((log_level[i] >> 8) & 0xFF, + log_level[i] & 0xFF); + } + for (i = 0; i < log_level_fw_argc; i++) { + dev_notice(&priv->func->dev, "log_level_fw[%d]=0x%X\n", + i, log_level_fw[i]); + iwmct_log_set_fw_filter((log_level_fw[i] >> 8) & 0xFF, + log_level_fw[i] & 0xFF); + } +#endif + + priv->dbg.blocks = blocks; + LOG_INFO(priv, INIT, "blocks=%d\n", blocks); + priv->dbg.dump = (bool)dump; + LOG_INFO(priv, INIT, "dump=%d\n", dump); + priv->dbg.jump = (bool)jump; + LOG_INFO(priv, INIT, "jump=%d\n", jump); + priv->dbg.direct = (bool)direct; + LOG_INFO(priv, INIT, "direct=%d\n", direct); + priv->dbg.checksum = (bool)checksum; + LOG_INFO(priv, INIT, "checksum=%d\n", checksum); + priv->dbg.fw_download = (bool)fw_download; + LOG_INFO(priv, INIT, "fw_download=%d\n", fw_download); + priv->dbg.block_size = block_size; + LOG_INFO(priv, INIT, "block_size=%d\n", block_size); + priv->dbg.download_trans_blks = download_trans_blks; + LOG_INFO(priv, INIT, "download_trans_blks=%d\n", download_trans_blks); +} + +/***************************************************************************** + * + * sysfs attributes + * + *****************************************************************************/ +static ssize_t show_iwmct_fw_version(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct iwmct_priv *priv = dev_get_drvdata(d); + return sprintf(buf, "%s\n", priv->dbg.label_fw); +} +static DEVICE_ATTR(cc_label_fw, S_IRUGO, show_iwmct_fw_version, NULL); + +#ifdef CONFIG_IWMC3200TOP_DEBUG +static DEVICE_ATTR(log_level, S_IWUSR | S_IRUGO, + show_iwmct_log_level, store_iwmct_log_level); +static DEVICE_ATTR(log_level_fw, S_IWUSR | S_IRUGO, + show_iwmct_log_level_fw, store_iwmct_log_level_fw); +#endif + +static struct attribute *iwmct_sysfs_entries[] = { + &dev_attr_cc_label_fw.attr, +#ifdef CONFIG_IWMC3200TOP_DEBUG + &dev_attr_log_level.attr, + &dev_attr_log_level_fw.attr, +#endif + NULL +}; + +static struct attribute_group iwmct_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = iwmct_sysfs_entries, +}; + + +static int iwmct_probe(struct sdio_func *func, + const struct sdio_device_id *id) +{ + struct iwmct_priv *priv; + int ret; + int val = 1; + int addr = IWMC_SDIO_INTR_ENABLE_ADDR; + + dev_dbg(&func->dev, "enter iwmct_probe\n"); + + dev_dbg(&func->dev, "IRQ polling period id %u msecs, HZ is %d\n", + jiffies_to_msecs(2147483647), HZ); + + priv = kzalloc(sizeof(struct iwmct_priv), GFP_KERNEL); + if (!priv) { + dev_err(&func->dev, "kzalloc error\n"); + return -ENOMEM; + } + priv->func = func; + sdio_set_drvdata(func, priv); + + + /* create drivers work queue */ + priv->wq = create_workqueue(DRV_NAME "_wq"); + priv->bus_rescan_wq = create_workqueue(DRV_NAME "_rescan_wq"); + INIT_WORK(&priv->bus_rescan_worker, iwmct_rescan_worker); + INIT_WORK(&priv->isr_worker, iwmct_irq_read_worker); + + init_waitqueue_head(&priv->wait_q); + + sdio_claim_host(func); + /* FIXME: Remove after it is fixed in the Boot ROM upgrade */ + func->enable_timeout = 10; + + /* In our HW, setting the block size also wakes up the boot rom. */ + ret = sdio_set_block_size(func, priv->dbg.block_size); + if (ret) { + LOG_ERROR(priv, INIT, + "sdio_set_block_size() failure: %d\n", ret); + goto error_sdio_enable; + } + + ret = sdio_enable_func(func); + if (ret) { + LOG_ERROR(priv, INIT, "sdio_enable_func() failure: %d\n", ret); + goto error_sdio_enable; + } + + /* init reset and dev_sync states */ + atomic_set(&priv->reset, 0); + atomic_set(&priv->dev_sync, 0); + + /* init read req queue */ + INIT_LIST_HEAD(&priv->read_req_list); + + /* process configurable parameters */ + iwmct_dbg_init_params(priv); + ret = sysfs_create_group(&func->dev.kobj, &iwmct_attribute_group); + if (ret) { + LOG_ERROR(priv, INIT, "Failed to register attributes and " + "initialize module_params\n"); + goto error_dev_attrs; + } + + iwmct_dbgfs_register(priv, DRV_NAME); + + if (!priv->dbg.direct && priv->dbg.download_trans_blks > 8) { + LOG_INFO(priv, INIT, + "Reducing transaction to 8 blocks = 2K (from %d)\n", + priv->dbg.download_trans_blks); + priv->dbg.download_trans_blks = 8; + } + priv->trans_len = priv->dbg.download_trans_blks * priv->dbg.block_size; + LOG_INFO(priv, INIT, "Transaction length = %d\n", priv->trans_len); + + ret = sdio_claim_irq(func, iwmct_irq); + if (ret) { + LOG_ERROR(priv, INIT, "sdio_claim_irq() failure: %d\n", ret); + goto error_claim_irq; + } + + + /* Enable function's interrupt */ + sdio_writeb(priv->func, val, addr, &ret); + if (ret) { + LOG_ERROR(priv, INIT, "Failure writing to " + "Interrupt Enable Register (%d): %d\n", addr, ret); + goto error_enable_int; + } + + sdio_release_host(func); + + LOG_INFO(priv, INIT, "exit iwmct_probe\n"); + + return ret; + +error_enable_int: + sdio_release_irq(func); +error_claim_irq: + sdio_disable_func(func); +error_dev_attrs: + iwmct_dbgfs_unregister(priv->dbgfs); + sysfs_remove_group(&func->dev.kobj, &iwmct_attribute_group); +error_sdio_enable: + sdio_release_host(func); + return ret; +} + +static void iwmct_remove(struct sdio_func *func) +{ + struct iwmct_work_struct *read_req; + struct iwmct_priv *priv = sdio_get_drvdata(func); + + priv = sdio_get_drvdata(func); + + LOG_INFO(priv, INIT, "enter\n"); + + sdio_claim_host(func); + sdio_release_irq(func); + sdio_release_host(func); + + /* Safely destroy osc workqueue */ + destroy_workqueue(priv->bus_rescan_wq); + destroy_workqueue(priv->wq); + + sdio_claim_host(func); + sdio_disable_func(func); + sysfs_remove_group(&func->dev.kobj, &iwmct_attribute_group); + iwmct_dbgfs_unregister(priv->dbgfs); + sdio_release_host(func); + + /* free read requests */ + while (!list_empty(&priv->read_req_list)) { + read_req = list_entry(priv->read_req_list.next, + struct iwmct_work_struct, list); + + list_del(&read_req->list); + kfree(read_req); + } + + kfree(priv); +} + + +static const struct sdio_device_id iwmct_ids[] = { + /* Intel Wireless MultiCom 3200 Top Driver */ + { SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, 0x1404)}, + { }, /* Terminating entry */ +}; + +MODULE_DEVICE_TABLE(sdio, iwmct_ids); + +static struct sdio_driver iwmct_driver = { + .probe = iwmct_probe, + .remove = iwmct_remove, + .name = DRV_NAME, + .id_table = iwmct_ids, +}; + +static int __init iwmct_init(void) +{ + int rc; + + /* Default log filter settings */ + iwmct_log_set_filter(LOG_SRC_ALL, LOG_SEV_FILTER_RUNTIME); + iwmct_log_set_filter(LOG_SRC_FW_MSG, LOG_SEV_FILTER_ALL); + iwmct_log_set_fw_filter(LOG_SRC_ALL, FW_LOG_SEV_FILTER_RUNTIME); + + rc = sdio_register_driver(&iwmct_driver); + + return rc; +} + +static void __exit iwmct_exit(void) +{ + sdio_unregister_driver(&iwmct_driver); +} + +module_init(iwmct_init); +module_exit(iwmct_exit); + diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c new file mode 100644 index 00000000000..fcb6ec1af17 --- /dev/null +++ b/drivers/misc/kgdbts.c @@ -0,0 +1,1126 @@ +/* + * kgdbts is a test suite for kgdb for the sole purpose of validating + * that key pieces of the kgdb internals are working properly such as + * HW/SW breakpoints, single stepping, and NMI. + * + * Created by: Jason Wessel <jason.wessel@windriver.com> + * + * Copyright (c) 2008 Wind River Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* Information about the kgdb test suite. + * ------------------------------------- + * + * The kgdb test suite is designed as a KGDB I/O module which + * simulates the communications that a debugger would have with kgdb. + * The tests are broken up in to a line by line and referenced here as + * a "get" which is kgdb requesting input and "put" which is kgdb + * sending a response. + * + * The kgdb suite can be invoked from the kernel command line + * arguments system or executed dynamically at run time. The test + * suite uses the variable "kgdbts" to obtain the information about + * which tests to run and to configure the verbosity level. The + * following are the various characters you can use with the kgdbts= + * line: + * + * When using the "kgdbts=" you only choose one of the following core + * test types: + * A = Run all the core tests silently + * V1 = Run all the core tests with minimal output + * V2 = Run all the core tests in debug mode + * + * You can also specify optional tests: + * N## = Go to sleep with interrupts of for ## seconds + * to test the HW NMI watchdog + * F## = Break at do_fork for ## iterations + * S## = Break at sys_open for ## iterations + * I## = Run the single step test ## iterations + * + * NOTE: that the do_fork and sys_open tests are mutually exclusive. + * + * To invoke the kgdb test suite from boot you use a kernel start + * argument as follows: + * kgdbts=V1 kgdbwait + * Or if you wanted to perform the NMI test for 6 seconds and do_fork + * test for 100 forks, you could use: + * kgdbts=V1N6F100 kgdbwait + * + * The test suite can also be invoked at run time with: + * echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts + * Or as another example: + * echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts + * + * When developing a new kgdb arch specific implementation or + * using these tests for the purpose of regression testing, + * several invocations are required. + * + * 1) Boot with the test suite enabled by using the kernel arguments + * "kgdbts=V1F100 kgdbwait" + * ## If kgdb arch specific implementation has NMI use + * "kgdbts=V1N6F100 + * + * 2) After the system boot run the basic test. + * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts + * + * 3) Run the concurrency tests. It is best to use n+1 + * while loops where n is the number of cpus you have + * in your system. The example below uses only two + * loops. + * + * ## This tests break points on sys_open + * while [ 1 ] ; do find / > /dev/null 2>&1 ; done & + * while [ 1 ] ; do find / > /dev/null 2>&1 ; done & + * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts + * fg # and hit control-c + * fg # and hit control-c + * ## This tests break points on do_fork + * while [ 1 ] ; do date > /dev/null ; done & + * while [ 1 ] ; do date > /dev/null ; done & + * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts + * fg # and hit control-c + * + */ + +#include <linux/kernel.h> +#include <linux/kgdb.h> +#include <linux/ctype.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/nmi.h> +#include <linux/delay.h> +#include <linux/kthread.h> + +#define v1printk(a...) do { \ + if (verbose) \ + printk(KERN_INFO a); \ + } while (0) +#define v2printk(a...) do { \ + if (verbose > 1) \ + printk(KERN_INFO a); \ + touch_nmi_watchdog(); \ + } while (0) +#define eprintk(a...) do { \ + printk(KERN_ERR a); \ + WARN_ON(1); \ + } while (0) +#define MAX_CONFIG_LEN 40 + +static struct kgdb_io kgdbts_io_ops; +static char get_buf[BUFMAX]; +static int get_buf_cnt; +static char put_buf[BUFMAX]; +static int put_buf_cnt; +static char scratch_buf[BUFMAX]; +static int verbose; +static int repeat_test; +static int test_complete; +static int send_ack; +static int final_ack; +static int force_hwbrks; +static int hwbreaks_ok; +static int hw_break_val; +static int hw_break_val2; +#if defined(CONFIG_ARM) || defined(CONFIG_MIPS) || defined(CONFIG_SPARC) +static int arch_needs_sstep_emulation = 1; +#else +static int arch_needs_sstep_emulation; +#endif +static unsigned long sstep_addr; +static int sstep_state; + +/* Storage for the registers, in GDB format. */ +static unsigned long kgdbts_gdb_regs[(NUMREGBYTES + + sizeof(unsigned long) - 1) / + sizeof(unsigned long)]; +static struct pt_regs kgdbts_regs; + +/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */ +static int configured = -1; + +#ifdef CONFIG_KGDB_TESTS_BOOT_STRING +static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING; +#else +static char config[MAX_CONFIG_LEN]; +#endif +static struct kparam_string kps = { + .string = config, + .maxlen = MAX_CONFIG_LEN, +}; + +static void fill_get_buf(char *buf); + +struct test_struct { + char *get; + char *put; + void (*get_handler)(char *); + int (*put_handler)(char *, char *); +}; + +struct test_state { + char *name; + struct test_struct *tst; + int idx; + int (*run_test) (int, int); + int (*validate_put) (char *); +}; + +static struct test_state ts; + +static int kgdbts_unreg_thread(void *ptr) +{ + /* Wait until the tests are complete and then ungresiter the I/O + * driver. + */ + while (!final_ack) + msleep_interruptible(1500); + + if (configured) + kgdb_unregister_io_module(&kgdbts_io_ops); + configured = 0; + + return 0; +} + +/* This is noinline such that it can be used for a single location to + * place a breakpoint + */ +static noinline void kgdbts_break_test(void) +{ + v2printk("kgdbts: breakpoint complete\n"); +} + +/* Lookup symbol info in the kernel */ +static unsigned long lookup_addr(char *arg) +{ + unsigned long addr = 0; + + if (!strcmp(arg, "kgdbts_break_test")) + addr = (unsigned long)kgdbts_break_test; + else if (!strcmp(arg, "sys_open")) + addr = (unsigned long)sys_open; + else if (!strcmp(arg, "do_fork")) + addr = (unsigned long)do_fork; + else if (!strcmp(arg, "hw_break_val")) + addr = (unsigned long)&hw_break_val; + return addr; +} + +static void break_helper(char *bp_type, char *arg, unsigned long vaddr) +{ + unsigned long addr; + + if (arg) + addr = lookup_addr(arg); + else + addr = vaddr; + + sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr, + BREAK_INSTR_SIZE); + fill_get_buf(scratch_buf); +} + +static void sw_break(char *arg) +{ + break_helper(force_hwbrks ? "Z1" : "Z0", arg, 0); +} + +static void sw_rem_break(char *arg) +{ + break_helper(force_hwbrks ? "z1" : "z0", arg, 0); +} + +static void hw_break(char *arg) +{ + break_helper("Z1", arg, 0); +} + +static void hw_rem_break(char *arg) +{ + break_helper("z1", arg, 0); +} + +static void hw_write_break(char *arg) +{ + break_helper("Z2", arg, 0); +} + +static void hw_rem_write_break(char *arg) +{ + break_helper("z2", arg, 0); +} + +static void hw_access_break(char *arg) +{ + break_helper("Z4", arg, 0); +} + +static void hw_rem_access_break(char *arg) +{ + break_helper("z4", arg, 0); +} + +static void hw_break_val_access(void) +{ + hw_break_val2 = hw_break_val; +} + +static void hw_break_val_write(void) +{ + hw_break_val++; +} + +static int check_and_rewind_pc(char *put_str, char *arg) +{ + unsigned long addr = lookup_addr(arg); + int offset = 0; + + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + v2printk("Stopped at IP: %lx\n", instruction_pointer(&kgdbts_regs)); +#ifdef CONFIG_X86 + /* On x86 a breakpoint stop requires it to be decremented */ + if (addr + 1 == kgdbts_regs.ip) + offset = -1; +#endif + if (strcmp(arg, "silent") && + instruction_pointer(&kgdbts_regs) + offset != addr) { + eprintk("kgdbts: BP mismatch %lx expected %lx\n", + instruction_pointer(&kgdbts_regs) + offset, addr); + return 1; + } +#ifdef CONFIG_X86 + /* On x86 adjust the instruction pointer if needed */ + kgdbts_regs.ip += offset; +#endif + return 0; +} + +static int check_single_step(char *put_str, char *arg) +{ + unsigned long addr = lookup_addr(arg); + /* + * From an arch indepent point of view the instruction pointer + * should be on a different instruction + */ + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + v2printk("Singlestep stopped at IP: %lx\n", + instruction_pointer(&kgdbts_regs)); + if (instruction_pointer(&kgdbts_regs) == addr) { + eprintk("kgdbts: SingleStep failed at %lx\n", + instruction_pointer(&kgdbts_regs)); + return 1; + } + + return 0; +} + +static void write_regs(char *arg) +{ + memset(scratch_buf, 0, sizeof(scratch_buf)); + scratch_buf[0] = 'G'; + pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs); + kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES); + fill_get_buf(scratch_buf); +} + +static void skip_back_repeat_test(char *arg) +{ + int go_back = simple_strtol(arg, NULL, 10); + + repeat_test--; + if (repeat_test <= 0) + ts.idx++; + else + ts.idx -= go_back; + fill_get_buf(ts.tst[ts.idx].get); +} + +static int got_break(char *put_str, char *arg) +{ + test_complete = 1; + if (!strncmp(put_str+1, arg, 2)) { + if (!strncmp(arg, "T0", 2)) + test_complete = 2; + return 0; + } + return 1; +} + +static void emul_sstep_get(char *arg) +{ + if (!arch_needs_sstep_emulation) { + fill_get_buf(arg); + return; + } + switch (sstep_state) { + case 0: + v2printk("Emulate single step\n"); + /* Start by looking at the current PC */ + fill_get_buf("g"); + break; + case 1: + /* set breakpoint */ + break_helper("Z0", NULL, sstep_addr); + break; + case 2: + /* Continue */ + fill_get_buf("c"); + break; + case 3: + /* Clear breakpoint */ + break_helper("z0", NULL, sstep_addr); + break; + default: + eprintk("kgdbts: ERROR failed sstep get emulation\n"); + } + sstep_state++; +} + +static int emul_sstep_put(char *put_str, char *arg) +{ + if (!arch_needs_sstep_emulation) { + if (!strncmp(put_str+1, arg, 2)) + return 0; + return 1; + } + switch (sstep_state) { + case 1: + /* validate the "g" packet to get the IP */ + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + v2printk("Stopped at IP: %lx\n", + instruction_pointer(&kgdbts_regs)); + /* Want to stop at IP + break instruction size by default */ + sstep_addr = instruction_pointer(&kgdbts_regs) + + BREAK_INSTR_SIZE; + break; + case 2: + if (strncmp(put_str, "$OK", 3)) { + eprintk("kgdbts: failed sstep break set\n"); + return 1; + } + break; + case 3: + if (strncmp(put_str, "$T0", 3)) { + eprintk("kgdbts: failed continue sstep\n"); + return 1; + } + break; + case 4: + if (strncmp(put_str, "$OK", 3)) { + eprintk("kgdbts: failed sstep break unset\n"); + return 1; + } + /* Single step is complete so continue on! */ + sstep_state = 0; + return 0; + default: + eprintk("kgdbts: ERROR failed sstep put emulation\n"); + } + + /* Continue on the same test line until emulation is complete */ + ts.idx--; + return 0; +} + +static int final_ack_set(char *put_str, char *arg) +{ + if (strncmp(put_str+1, arg, 2)) + return 1; + final_ack = 1; + return 0; +} +/* + * Test to plant a breakpoint and detach, which should clear out the + * breakpoint and restore the original instruction. + */ +static struct test_struct plant_and_detach_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "D", "OK" }, /* Detach */ + { "", "" }, +}; + +/* + * Simple test to write in a software breakpoint, check for the + * correct stop location and detach. + */ +static struct test_struct sw_breakpoint_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test a known bad memory read location to test the fault handler and + * read bytes 1-8 at the bad address + */ +static struct test_struct bad_read_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "m0,1", "E*" }, /* read 1 byte at address 1 */ + { "m0,2", "E*" }, /* read 1 byte at address 2 */ + { "m0,3", "E*" }, /* read 1 byte at address 3 */ + { "m0,4", "E*" }, /* read 1 byte at address 4 */ + { "m0,5", "E*" }, /* read 1 byte at address 5 */ + { "m0,6", "E*" }, /* read 1 byte at address 6 */ + { "m0,7", "E*" }, /* read 1 byte at address 7 */ + { "m0,8", "E*" }, /* read 1 byte at address 8 */ + { "D", "OK" }, /* Detach which removes all breakpoints and continues */ + { "", "" }, +}; + +/* + * Test for hitting a breakpoint, remove it, single step, plant it + * again and detach. + */ +static struct test_struct singlestep_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, /* Write registers */ + { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "kgdbts_break_test", NULL, check_single_step }, + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, /* Write registers */ + { "D", "OK" }, /* Remove all breakpoints and continues */ + { "", "" }, +}; + +/* + * Test for hitting a breakpoint at do_fork for what ever the number + * of iterations required by the variable repeat_test. + */ +static struct test_struct do_fork_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */ + { "write", "OK", write_regs }, /* Write registers */ + { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "do_fork", NULL, check_single_step }, + { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ + { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ + { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */ + { "", "" }, +}; + +/* Test for hitting a breakpoint at sys_open for what ever the number + * of iterations required by the variable repeat_test. + */ +static struct test_struct sys_open_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */ + { "write", "OK", write_regs }, /* Write registers */ + { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "sys_open", NULL, check_single_step }, + { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ + { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ + { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */ + { "", "" }, +}; + +/* + * Test for hitting a simple hw breakpoint + */ +static struct test_struct hw_breakpoint_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw write breakpoint + */ +static struct test_struct hw_write_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "g", "silent", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw access breakpoint + */ +static struct test_struct hw_access_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "g", "silent", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw access breakpoint + */ +static struct test_struct nmi_sleep_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +static void fill_get_buf(char *buf) +{ + unsigned char checksum = 0; + int count = 0; + char ch; + + strcpy(get_buf, "$"); + strcat(get_buf, buf); + while ((ch = buf[count])) { + checksum += ch; + count++; + } + strcat(get_buf, "#"); + get_buf[count + 2] = hex_asc_hi(checksum); + get_buf[count + 3] = hex_asc_lo(checksum); + get_buf[count + 4] = '\0'; + v2printk("get%i: %s\n", ts.idx, get_buf); +} + +static int validate_simple_test(char *put_str) +{ + char *chk_str; + + if (ts.tst[ts.idx].put_handler) + return ts.tst[ts.idx].put_handler(put_str, + ts.tst[ts.idx].put); + + chk_str = ts.tst[ts.idx].put; + if (*put_str == '$') + put_str++; + + while (*chk_str != '\0' && *put_str != '\0') { + /* If someone does a * to match the rest of the string, allow + * it, or stop if the recieved string is complete. + */ + if (*put_str == '#' || *chk_str == '*') + return 0; + if (*put_str != *chk_str) + return 1; + + chk_str++; + put_str++; + } + if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#')) + return 0; + + return 1; +} + +static int run_simple_test(int is_get_char, int chr) +{ + int ret = 0; + if (is_get_char) { + /* Send an ACK on the get if a prior put completed and set the + * send ack variable + */ + if (send_ack) { + send_ack = 0; + return '+'; + } + /* On the first get char, fill the transmit buffer and then + * take from the get_string. + */ + if (get_buf_cnt == 0) { + if (ts.tst[ts.idx].get_handler) + ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get); + else + fill_get_buf(ts.tst[ts.idx].get); + } + + if (get_buf[get_buf_cnt] == '\0') { + eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n", + ts.name, ts.idx); + get_buf_cnt = 0; + fill_get_buf("D"); + } + ret = get_buf[get_buf_cnt]; + get_buf_cnt++; + return ret; + } + + /* This callback is a put char which is when kgdb sends data to + * this I/O module. + */ + if (ts.tst[ts.idx].get[0] == '\0' && + ts.tst[ts.idx].put[0] == '\0') { + eprintk("kgdbts: ERROR: beyond end of test on" + " '%s' line %i\n", ts.name, ts.idx); + return 0; + } + + if (put_buf_cnt >= BUFMAX) { + eprintk("kgdbts: ERROR: put buffer overflow on" + " '%s' line %i\n", ts.name, ts.idx); + put_buf_cnt = 0; + return 0; + } + /* Ignore everything until the first valid packet start '$' */ + if (put_buf_cnt == 0 && chr != '$') + return 0; + + put_buf[put_buf_cnt] = chr; + put_buf_cnt++; + + /* End of packet == #XX so look for the '#' */ + if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') { + if (put_buf_cnt >= BUFMAX) { + eprintk("kgdbts: ERROR: put buffer overflow on" + " '%s' line %i\n", ts.name, ts.idx); + put_buf_cnt = 0; + return 0; + } + put_buf[put_buf_cnt] = '\0'; + v2printk("put%i: %s\n", ts.idx, put_buf); + /* Trigger check here */ + if (ts.validate_put && ts.validate_put(put_buf)) { + eprintk("kgdbts: ERROR PUT: end of test " + "buffer on '%s' line %i expected %s got %s\n", + ts.name, ts.idx, ts.tst[ts.idx].put, put_buf); + } + ts.idx++; + put_buf_cnt = 0; + get_buf_cnt = 0; + send_ack = 1; + } + return 0; +} + +static void init_simple_test(void) +{ + memset(&ts, 0, sizeof(ts)); + ts.run_test = run_simple_test; + ts.validate_put = validate_simple_test; +} + +static void run_plant_and_detach_test(int is_early) +{ + char before[BREAK_INSTR_SIZE]; + char after[BREAK_INSTR_SIZE]; + + probe_kernel_read(before, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); + init_simple_test(); + ts.tst = plant_and_detach_test; + ts.name = "plant_and_detach_test"; + /* Activate test with initial breakpoint */ + if (!is_early) + kgdb_breakpoint(); + probe_kernel_read(after, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); + if (memcmp(before, after, BREAK_INSTR_SIZE)) { + printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n"); + panic("kgdb memory corruption"); + } + + /* complete the detach test */ + if (!is_early) + kgdbts_break_test(); +} + +static void run_breakpoint_test(int is_hw_breakpoint) +{ + test_complete = 0; + init_simple_test(); + if (is_hw_breakpoint) { + ts.tst = hw_breakpoint_test; + ts.name = "hw_breakpoint_test"; + } else { + ts.tst = sw_breakpoint_test; + ts.name = "sw_breakpoint_test"; + } + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + /* run code with the break point in it */ + kgdbts_break_test(); + kgdb_breakpoint(); + + if (test_complete) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); + if (is_hw_breakpoint) + hwbreaks_ok = 0; +} + +static void run_hw_break_test(int is_write_test) +{ + test_complete = 0; + init_simple_test(); + if (is_write_test) { + ts.tst = hw_write_break_test; + ts.name = "hw_write_break_test"; + } else { + ts.tst = hw_access_break_test; + ts.name = "hw_access_break_test"; + } + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + hw_break_val_access(); + if (is_write_test) { + if (test_complete == 2) { + eprintk("kgdbts: ERROR %s broke on access\n", + ts.name); + hwbreaks_ok = 0; + } + hw_break_val_write(); + } + kgdb_breakpoint(); + + if (test_complete == 1) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); + hwbreaks_ok = 0; +} + +static void run_nmi_sleep_test(int nmi_sleep) +{ + unsigned long flags; + + init_simple_test(); + ts.tst = nmi_sleep_test; + ts.name = "nmi_sleep_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + local_irq_save(flags); + mdelay(nmi_sleep*1000); + touch_nmi_watchdog(); + local_irq_restore(flags); + if (test_complete != 2) + eprintk("kgdbts: ERROR nmi_test did not hit nmi\n"); + kgdb_breakpoint(); + if (test_complete == 1) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); +} + +static void run_bad_read_test(void) +{ + init_simple_test(); + ts.tst = bad_read_test; + ts.name = "bad_read_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_do_fork_test(void) +{ + init_simple_test(); + ts.tst = do_fork_test; + ts.name = "do_fork_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_sys_open_test(void) +{ + init_simple_test(); + ts.tst = sys_open_test; + ts.name = "sys_open_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_singlestep_break_test(void) +{ + init_simple_test(); + ts.tst = singlestep_break_test; + ts.name = "singlestep_breakpoint_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + kgdbts_break_test(); + kgdbts_break_test(); +} + +static void kgdbts_run_tests(void) +{ + char *ptr; + int fork_test = 0; + int do_sys_open_test = 0; + int sstep_test = 1000; + int nmi_sleep = 0; + int i; + + ptr = strchr(config, 'F'); + if (ptr) + fork_test = simple_strtol(ptr + 1, NULL, 10); + ptr = strchr(config, 'S'); + if (ptr) + do_sys_open_test = simple_strtol(ptr + 1, NULL, 10); + ptr = strchr(config, 'N'); + if (ptr) + nmi_sleep = simple_strtol(ptr+1, NULL, 10); + ptr = strchr(config, 'I'); + if (ptr) + sstep_test = simple_strtol(ptr+1, NULL, 10); + + /* required internal KGDB tests */ + v1printk("kgdbts:RUN plant and detach test\n"); + run_plant_and_detach_test(0); + v1printk("kgdbts:RUN sw breakpoint test\n"); + run_breakpoint_test(0); + v1printk("kgdbts:RUN bad memory access test\n"); + run_bad_read_test(); + v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test); + for (i = 0; i < sstep_test; i++) { + run_singlestep_break_test(); + if (i % 100 == 0) + v1printk("kgdbts:RUN singlestep [%i/%i]\n", + i, sstep_test); + } + + /* ===Optional tests=== */ + + /* All HW break point tests */ + if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) { + hwbreaks_ok = 1; + v1printk("kgdbts:RUN hw breakpoint test\n"); + run_breakpoint_test(1); + v1printk("kgdbts:RUN hw write breakpoint test\n"); + run_hw_break_test(1); + v1printk("kgdbts:RUN access write breakpoint test\n"); + run_hw_break_test(0); + } + + if (nmi_sleep) { + v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep); + run_nmi_sleep_test(nmi_sleep); + } + +#ifdef CONFIG_DEBUG_RODATA + /* Until there is an api to write to read-only text segments, use + * HW breakpoints for the remainder of any tests, else print a + * failure message if hw breakpoints do not work. + */ + if (!(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT && hwbreaks_ok)) { + eprintk("kgdbts: HW breakpoints do not work," + "skipping remaining tests\n"); + return; + } + force_hwbrks = 1; +#endif /* CONFIG_DEBUG_RODATA */ + + /* If the do_fork test is run it will be the last test that is + * executed because a kernel thread will be spawned at the very + * end to unregister the debug hooks. + */ + if (fork_test) { + repeat_test = fork_test; + printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n", + repeat_test); + kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg"); + run_do_fork_test(); + return; + } + + /* If the sys_open test is run it will be the last test that is + * executed because a kernel thread will be spawned at the very + * end to unregister the debug hooks. + */ + if (do_sys_open_test) { + repeat_test = do_sys_open_test; + printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n", + repeat_test); + kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg"); + run_sys_open_test(); + return; + } + /* Shutdown and unregister */ + kgdb_unregister_io_module(&kgdbts_io_ops); + configured = 0; +} + +static int kgdbts_option_setup(char *opt) +{ + if (strlen(opt) > MAX_CONFIG_LEN) { + printk(KERN_ERR "kgdbts: config string too long\n"); + return -ENOSPC; + } + strcpy(config, opt); + + verbose = 0; + if (strstr(config, "V1")) + verbose = 1; + if (strstr(config, "V2")) + verbose = 2; + + return 0; +} + +__setup("kgdbts=", kgdbts_option_setup); + +static int configure_kgdbts(void) +{ + int err = 0; + + if (!strlen(config) || isspace(config[0])) + goto noconfig; + err = kgdbts_option_setup(config); + if (err) + goto noconfig; + + final_ack = 0; + run_plant_and_detach_test(1); + + err = kgdb_register_io_module(&kgdbts_io_ops); + if (err) { + configured = 0; + return err; + } + configured = 1; + kgdbts_run_tests(); + + return err; + +noconfig: + config[0] = 0; + configured = 0; + + return err; +} + +static int __init init_kgdbts(void) +{ + /* Already configured? */ + if (configured == 1) + return 0; + + return configure_kgdbts(); +} + +static void cleanup_kgdbts(void) +{ + if (configured == 1) + kgdb_unregister_io_module(&kgdbts_io_ops); +} + +static int kgdbts_get_char(void) +{ + int val = 0; + + if (ts.run_test) + val = ts.run_test(1, 0); + + return val; +} + +static void kgdbts_put_char(u8 chr) +{ + if (ts.run_test) + ts.run_test(0, chr); +} + +static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp) +{ + int len = strlen(kmessage); + + if (len >= MAX_CONFIG_LEN) { + printk(KERN_ERR "kgdbts: config string too long\n"); + return -ENOSPC; + } + + /* Only copy in the string if the init function has not run yet */ + if (configured < 0) { + strcpy(config, kmessage); + return 0; + } + + if (kgdb_connected) { + printk(KERN_ERR + "kgdbts: Cannot reconfigure while KGDB is connected.\n"); + + return -EBUSY; + } + + strcpy(config, kmessage); + /* Chop out \n char as a result of echo */ + if (config[len - 1] == '\n') + config[len - 1] = '\0'; + + if (configured == 1) + cleanup_kgdbts(); + + /* Go and configure with the new params. */ + return configure_kgdbts(); +} + +static void kgdbts_pre_exp_handler(void) +{ + /* Increment the module count when the debugger is active */ + if (!kgdb_connected) + try_module_get(THIS_MODULE); +} + +static void kgdbts_post_exp_handler(void) +{ + /* decrement the module count when the debugger detaches */ + if (!kgdb_connected) + module_put(THIS_MODULE); +} + +static struct kgdb_io kgdbts_io_ops = { + .name = "kgdbts", + .read_char = kgdbts_get_char, + .write_char = kgdbts_put_char, + .pre_exception = kgdbts_pre_exp_handler, + .post_exception = kgdbts_post_exp_handler, +}; + +module_init(init_kgdbts); +module_exit(cleanup_kgdbts); +module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644); +MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]"); +MODULE_DESCRIPTION("KGDB Test Suite"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Wind River Systems, Inc."); + diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c new file mode 100644 index 00000000000..3648b23d5c9 --- /dev/null +++ b/drivers/misc/lkdtm.c @@ -0,0 +1,345 @@ +/* + * Kprobe module for testing crash dumps + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Ankita Garg <ankita@in.ibm.com> + * + * This module induces system failures at predefined crashpoints to + * evaluate the reliability of crash dumps obtained using different dumping + * solutions. + * + * It is adapted from the Linux Kernel Dump Test Tool by + * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net> + * + * Usage : insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<> + * [cpoint_count={>0}] + * + * recur_count : Recursion level for the stack overflow test. Default is 10. + * + * cpoint_name : Crash point where the kernel is to be crashed. It can be + * one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY, + * FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD, + * IDE_CORE_CP + * + * cpoint_type : Indicates the action to be taken on hitting the crash point. + * It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW + * + * cpoint_count : Indicates the number of times the crash point is to be hit + * to trigger an action. The default is 10. + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/buffer_head.h> +#include <linux/kprobes.h> +#include <linux/list.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/hrtimer.h> +#include <scsi/scsi_cmnd.h> + +#ifdef CONFIG_IDE +#include <linux/ide.h> +#endif + +#define NUM_CPOINTS 8 +#define NUM_CPOINT_TYPES 5 +#define DEFAULT_COUNT 10 +#define REC_NUM_DEFAULT 10 + +enum cname { + INVALID, + INT_HARDWARE_ENTRY, + INT_HW_IRQ_EN, + INT_TASKLET_ENTRY, + FS_DEVRW, + MEM_SWAPOUT, + TIMERADD, + SCSI_DISPATCH_CMD, + IDE_CORE_CP +}; + +enum ctype { + NONE, + PANIC, + BUG, + EXCEPTION, + LOOP, + OVERFLOW +}; + +static char* cp_name[] = { + "INT_HARDWARE_ENTRY", + "INT_HW_IRQ_EN", + "INT_TASKLET_ENTRY", + "FS_DEVRW", + "MEM_SWAPOUT", + "TIMERADD", + "SCSI_DISPATCH_CMD", + "IDE_CORE_CP" +}; + +static char* cp_type[] = { + "PANIC", + "BUG", + "EXCEPTION", + "LOOP", + "OVERFLOW" +}; + +static struct jprobe lkdtm; + +static int lkdtm_parse_commandline(void); +static void lkdtm_handler(void); + +static char* cpoint_name; +static char* cpoint_type; +static int cpoint_count = DEFAULT_COUNT; +static int recur_count = REC_NUM_DEFAULT; + +static enum cname cpoint = INVALID; +static enum ctype cptype = NONE; +static int count = DEFAULT_COUNT; + +module_param(recur_count, int, 0644); +MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\ + "default is 10"); +module_param(cpoint_name, charp, 0644); +MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed"); +module_param(cpoint_type, charp, 0644); +MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\ + "hitting the crash point"); +module_param(cpoint_count, int, 0644); +MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\ + "crash point is to be hit to trigger action"); + +static unsigned int jp_do_irq(unsigned int irq) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static irqreturn_t jp_handle_irq_event(unsigned int irq, + struct irqaction *action) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static void jp_tasklet_action(struct softirq_action *a) +{ + lkdtm_handler(); + jprobe_return(); +} + +static void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) +{ + lkdtm_handler(); + jprobe_return(); +} + +struct scan_control; + +static unsigned long jp_shrink_inactive_list(unsigned long max_scan, + struct zone *zone, + struct scan_control *sc) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +#ifdef CONFIG_IDE +int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file, + struct block_device *bdev, unsigned int cmd, + unsigned long arg) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} +#endif + +static int lkdtm_parse_commandline(void) +{ + int i; + + if (cpoint_name == NULL || cpoint_type == NULL || + cpoint_count < 1 || recur_count < 1) + return -EINVAL; + + for (i = 0; i < NUM_CPOINTS; ++i) { + if (!strcmp(cpoint_name, cp_name[i])) { + cpoint = i + 1; + break; + } + } + + for (i = 0; i < NUM_CPOINT_TYPES; ++i) { + if (!strcmp(cpoint_type, cp_type[i])) { + cptype = i + 1; + break; + } + } + + if (cpoint == INVALID || cptype == NONE) + return -EINVAL; + + count = cpoint_count; + + return 0; +} + +static int recursive_loop(int a) +{ + char buf[1024]; + + memset(buf,0xFF,1024); + recur_count--; + if (!recur_count) + return 0; + else + return recursive_loop(a); +} + +void lkdtm_handler(void) +{ + printk(KERN_INFO "lkdtm : Crash point %s of type %s hit\n", + cpoint_name, cpoint_type); + --count; + + if (count == 0) { + switch (cptype) { + case NONE: + break; + case PANIC: + printk(KERN_INFO "lkdtm : PANIC\n"); + panic("dumptest"); + break; + case BUG: + printk(KERN_INFO "lkdtm : BUG\n"); + BUG(); + break; + case EXCEPTION: + printk(KERN_INFO "lkdtm : EXCEPTION\n"); + *((int *) 0) = 0; + break; + case LOOP: + printk(KERN_INFO "lkdtm : LOOP\n"); + for (;;); + break; + case OVERFLOW: + printk(KERN_INFO "lkdtm : OVERFLOW\n"); + (void) recursive_loop(0); + break; + default: + break; + } + count = cpoint_count; + } +} + +static int __init lkdtm_module_init(void) +{ + int ret; + + if (lkdtm_parse_commandline() == -EINVAL) { + printk(KERN_INFO "lkdtm : Invalid command\n"); + return -EINVAL; + } + + switch (cpoint) { + case INT_HARDWARE_ENTRY: + lkdtm.kp.symbol_name = "do_IRQ"; + lkdtm.entry = (kprobe_opcode_t*) jp_do_irq; + break; + case INT_HW_IRQ_EN: + lkdtm.kp.symbol_name = "handle_IRQ_event"; + lkdtm.entry = (kprobe_opcode_t*) jp_handle_irq_event; + break; + case INT_TASKLET_ENTRY: + lkdtm.kp.symbol_name = "tasklet_action"; + lkdtm.entry = (kprobe_opcode_t*) jp_tasklet_action; + break; + case FS_DEVRW: + lkdtm.kp.symbol_name = "ll_rw_block"; + lkdtm.entry = (kprobe_opcode_t*) jp_ll_rw_block; + break; + case MEM_SWAPOUT: + lkdtm.kp.symbol_name = "shrink_inactive_list"; + lkdtm.entry = (kprobe_opcode_t*) jp_shrink_inactive_list; + break; + case TIMERADD: + lkdtm.kp.symbol_name = "hrtimer_start"; + lkdtm.entry = (kprobe_opcode_t*) jp_hrtimer_start; + break; + case SCSI_DISPATCH_CMD: + lkdtm.kp.symbol_name = "scsi_dispatch_cmd"; + lkdtm.entry = (kprobe_opcode_t*) jp_scsi_dispatch_cmd; + break; + case IDE_CORE_CP: +#ifdef CONFIG_IDE + lkdtm.kp.symbol_name = "generic_ide_ioctl"; + lkdtm.entry = (kprobe_opcode_t*) jp_generic_ide_ioctl; +#else + printk(KERN_INFO "lkdtm : Crash point not available\n"); +#endif + break; + default: + printk(KERN_INFO "lkdtm : Invalid Crash Point\n"); + break; + } + + if ((ret = register_jprobe(&lkdtm)) < 0) { + printk(KERN_INFO "lkdtm : Couldn't register jprobe\n"); + return ret; + } + + printk(KERN_INFO "lkdtm : Crash point %s of type %s registered\n", + cpoint_name, cpoint_type); + return 0; +} + +static void __exit lkdtm_module_exit(void) +{ + unregister_jprobe(&lkdtm); + printk(KERN_INFO "lkdtm : Crash point unregistered\n"); +} + +module_init(lkdtm_module_init); +module_exit(lkdtm_module_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c new file mode 100644 index 00000000000..04c27266f56 --- /dev/null +++ b/drivers/misc/phantom.c @@ -0,0 +1,569 @@ +/* + * Copyright (C) 2005-2007 Jiri Slaby <jirislaby@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * You need a userspace library to cooperate with this driver. It (and other + * info) may be obtained here: + * http://www.fi.muni.cz/~xslaby/phantom.html + * or alternatively, you might use OpenHaptics provided by Sensable. + */ + +#include <linux/compat.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/interrupt.h> +#include <linux/cdev.h> +#include <linux/phantom.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> + +#include <asm/atomic.h> +#include <asm/io.h> + +#define PHANTOM_VERSION "n0.9.8" + +#define PHANTOM_MAX_MINORS 8 + +#define PHN_IRQCTL 0x4c /* irq control in caddr space */ + +#define PHB_RUNNING 1 +#define PHB_NOT_OH 2 + +static struct class *phantom_class; +static int phantom_major; + +struct phantom_device { + unsigned int opened; + void __iomem *caddr; + u32 __iomem *iaddr; + u32 __iomem *oaddr; + unsigned long status; + atomic_t counter; + + wait_queue_head_t wait; + struct cdev cdev; + + struct mutex open_lock; + spinlock_t regs_lock; + + /* used in NOT_OH mode */ + struct phm_regs oregs; + u32 ctl_reg; +}; + +static unsigned char phantom_devices[PHANTOM_MAX_MINORS]; + +static int phantom_status(struct phantom_device *dev, unsigned long newstat) +{ + pr_debug("phantom_status %lx %lx\n", dev->status, newstat); + + if (!(dev->status & PHB_RUNNING) && (newstat & PHB_RUNNING)) { + atomic_set(&dev->counter, 0); + iowrite32(PHN_CTL_IRQ, dev->iaddr + PHN_CONTROL); + iowrite32(0x43, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + } else if ((dev->status & PHB_RUNNING) && !(newstat & PHB_RUNNING)) { + iowrite32(0, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + } + + dev->status = newstat; + + return 0; +} + +/* + * File ops + */ + +static long phantom_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct phantom_device *dev = file->private_data; + struct phm_regs rs; + struct phm_reg r; + void __user *argp = (void __user *)arg; + unsigned long flags; + unsigned int i; + + switch (cmd) { + case PHN_SETREG: + case PHN_SET_REG: + if (copy_from_user(&r, argp, sizeof(r))) + return -EFAULT; + + if (r.reg > 7) + return -EINVAL; + + spin_lock_irqsave(&dev->regs_lock, flags); + if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) && + phantom_status(dev, dev->status | PHB_RUNNING)){ + spin_unlock_irqrestore(&dev->regs_lock, flags); + return -ENODEV; + } + + pr_debug("phantom: writing %x to %u\n", r.value, r.reg); + + /* preserve amp bit (don't allow to change it when in NOT_OH) */ + if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) { + r.value &= ~PHN_CTL_AMP; + r.value |= dev->ctl_reg & PHN_CTL_AMP; + dev->ctl_reg = r.value; + } + + iowrite32(r.value, dev->iaddr + r.reg); + ioread32(dev->iaddr); /* PCI posting */ + + if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ)) + phantom_status(dev, dev->status & ~PHB_RUNNING); + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + case PHN_SETREGS: + case PHN_SET_REGS: + if (copy_from_user(&rs, argp, sizeof(rs))) + return -EFAULT; + + pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask); + spin_lock_irqsave(&dev->regs_lock, flags); + if (dev->status & PHB_NOT_OH) + memcpy(&dev->oregs, &rs, sizeof(rs)); + else { + u32 m = min(rs.count, 8U); + for (i = 0; i < m; i++) + if (rs.mask & BIT(i)) + iowrite32(rs.values[i], dev->oaddr + i); + ioread32(dev->iaddr); /* PCI posting */ + } + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + case PHN_GETREG: + case PHN_GET_REG: + if (copy_from_user(&r, argp, sizeof(r))) + return -EFAULT; + + if (r.reg > 7) + return -EINVAL; + + r.value = ioread32(dev->iaddr + r.reg); + + if (copy_to_user(argp, &r, sizeof(r))) + return -EFAULT; + break; + case PHN_GETREGS: + case PHN_GET_REGS: { + u32 m; + + if (copy_from_user(&rs, argp, sizeof(rs))) + return -EFAULT; + + m = min(rs.count, 8U); + + pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask); + spin_lock_irqsave(&dev->regs_lock, flags); + for (i = 0; i < m; i++) + if (rs.mask & BIT(i)) + rs.values[i] = ioread32(dev->iaddr + i); + atomic_set(&dev->counter, 0); + spin_unlock_irqrestore(&dev->regs_lock, flags); + + if (copy_to_user(argp, &rs, sizeof(rs))) + return -EFAULT; + break; + } case PHN_NOT_OH: + spin_lock_irqsave(&dev->regs_lock, flags); + if (dev->status & PHB_RUNNING) { + printk(KERN_ERR "phantom: you need to set NOT_OH " + "before you start the device!\n"); + spin_unlock_irqrestore(&dev->regs_lock, flags); + return -EINVAL; + } + dev->status |= PHB_NOT_OH; + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + default: + return -ENOTTY; + } + + return 0; +} + +#ifdef CONFIG_COMPAT +static long phantom_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { + cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT); + cmd |= sizeof(void *) << _IOC_SIZESHIFT; + } + return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define phantom_compat_ioctl NULL +#endif + +static int phantom_open(struct inode *inode, struct file *file) +{ + struct phantom_device *dev = container_of(inode->i_cdev, + struct phantom_device, cdev); + + lock_kernel(); + nonseekable_open(inode, file); + + if (mutex_lock_interruptible(&dev->open_lock)) { + unlock_kernel(); + return -ERESTARTSYS; + } + + if (dev->opened) { + mutex_unlock(&dev->open_lock); + unlock_kernel(); + return -EINVAL; + } + + WARN_ON(dev->status & PHB_NOT_OH); + + file->private_data = dev; + + atomic_set(&dev->counter, 0); + dev->opened++; + mutex_unlock(&dev->open_lock); + unlock_kernel(); + return 0; +} + +static int phantom_release(struct inode *inode, struct file *file) +{ + struct phantom_device *dev = file->private_data; + + mutex_lock(&dev->open_lock); + + dev->opened = 0; + phantom_status(dev, dev->status & ~PHB_RUNNING); + dev->status &= ~PHB_NOT_OH; + + mutex_unlock(&dev->open_lock); + + return 0; +} + +static unsigned int phantom_poll(struct file *file, poll_table *wait) +{ + struct phantom_device *dev = file->private_data; + unsigned int mask = 0; + + pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter)); + poll_wait(file, &dev->wait, wait); + + if (!(dev->status & PHB_RUNNING)) + mask = POLLERR; + else if (atomic_read(&dev->counter)) + mask = POLLIN | POLLRDNORM; + + pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter)); + + return mask; +} + +static const struct file_operations phantom_file_ops = { + .open = phantom_open, + .release = phantom_release, + .unlocked_ioctl = phantom_ioctl, + .compat_ioctl = phantom_compat_ioctl, + .poll = phantom_poll, +}; + +static irqreturn_t phantom_isr(int irq, void *data) +{ + struct phantom_device *dev = data; + unsigned int i; + u32 ctl; + + spin_lock(&dev->regs_lock); + ctl = ioread32(dev->iaddr + PHN_CONTROL); + if (!(ctl & PHN_CTL_IRQ)) { + spin_unlock(&dev->regs_lock); + return IRQ_NONE; + } + + iowrite32(0, dev->iaddr); + iowrite32(0xc0, dev->iaddr); + + if (dev->status & PHB_NOT_OH) { + struct phm_regs *r = &dev->oregs; + u32 m = min(r->count, 8U); + + for (i = 0; i < m; i++) + if (r->mask & BIT(i)) + iowrite32(r->values[i], dev->oaddr + i); + + dev->ctl_reg ^= PHN_CTL_AMP; + iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL); + } + spin_unlock(&dev->regs_lock); + + ioread32(dev->iaddr); /* PCI posting */ + + atomic_inc(&dev->counter); + wake_up_interruptible(&dev->wait); + + return IRQ_HANDLED; +} + +/* + * Init and deinit driver + */ + +static unsigned int __devinit phantom_get_free(void) +{ + unsigned int i; + + for (i = 0; i < PHANTOM_MAX_MINORS; i++) + if (phantom_devices[i] == 0) + break; + + return i; +} + +static int __devinit phantom_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + struct phantom_device *pht; + unsigned int minor; + int retval; + + retval = pci_enable_device(pdev); + if (retval) + goto err; + + minor = phantom_get_free(); + if (minor == PHANTOM_MAX_MINORS) { + dev_err(&pdev->dev, "too many devices found!\n"); + retval = -EIO; + goto err_dis; + } + + phantom_devices[minor] = 1; + + retval = pci_request_regions(pdev, "phantom"); + if (retval) + goto err_null; + + retval = -ENOMEM; + pht = kzalloc(sizeof(*pht), GFP_KERNEL); + if (pht == NULL) { + dev_err(&pdev->dev, "unable to allocate device\n"); + goto err_reg; + } + + pht->caddr = pci_iomap(pdev, 0, 0); + if (pht->caddr == NULL) { + dev_err(&pdev->dev, "can't remap conf space\n"); + goto err_fr; + } + pht->iaddr = pci_iomap(pdev, 2, 0); + if (pht->iaddr == NULL) { + dev_err(&pdev->dev, "can't remap input space\n"); + goto err_unmc; + } + pht->oaddr = pci_iomap(pdev, 3, 0); + if (pht->oaddr == NULL) { + dev_err(&pdev->dev, "can't remap output space\n"); + goto err_unmi; + } + + mutex_init(&pht->open_lock); + spin_lock_init(&pht->regs_lock); + init_waitqueue_head(&pht->wait); + cdev_init(&pht->cdev, &phantom_file_ops); + pht->cdev.owner = THIS_MODULE; + + iowrite32(0, pht->caddr + PHN_IRQCTL); + ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */ + retval = request_irq(pdev->irq, phantom_isr, + IRQF_SHARED | IRQF_DISABLED, "phantom", pht); + if (retval) { + dev_err(&pdev->dev, "can't establish ISR\n"); + goto err_unmo; + } + + retval = cdev_add(&pht->cdev, MKDEV(phantom_major, minor), 1); + if (retval) { + dev_err(&pdev->dev, "chardev registration failed\n"); + goto err_irq; + } + + if (IS_ERR(device_create(phantom_class, &pdev->dev, + MKDEV(phantom_major, minor), NULL, + "phantom%u", minor))) + dev_err(&pdev->dev, "can't create device\n"); + + pci_set_drvdata(pdev, pht); + + return 0; +err_irq: + free_irq(pdev->irq, pht); +err_unmo: + pci_iounmap(pdev, pht->oaddr); +err_unmi: + pci_iounmap(pdev, pht->iaddr); +err_unmc: + pci_iounmap(pdev, pht->caddr); +err_fr: + kfree(pht); +err_reg: + pci_release_regions(pdev); +err_null: + phantom_devices[minor] = 0; +err_dis: + pci_disable_device(pdev); +err: + return retval; +} + +static void __devexit phantom_remove(struct pci_dev *pdev) +{ + struct phantom_device *pht = pci_get_drvdata(pdev); + unsigned int minor = MINOR(pht->cdev.dev); + + device_destroy(phantom_class, MKDEV(phantom_major, minor)); + + cdev_del(&pht->cdev); + + iowrite32(0, pht->caddr + PHN_IRQCTL); + ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */ + free_irq(pdev->irq, pht); + + pci_iounmap(pdev, pht->oaddr); + pci_iounmap(pdev, pht->iaddr); + pci_iounmap(pdev, pht->caddr); + + kfree(pht); + + pci_release_regions(pdev); + + phantom_devices[minor] = 0; + + pci_disable_device(pdev); +} + +#ifdef CONFIG_PM +static int phantom_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct phantom_device *dev = pci_get_drvdata(pdev); + + iowrite32(0, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + + synchronize_irq(pdev->irq); + + return 0; +} + +static int phantom_resume(struct pci_dev *pdev) +{ + struct phantom_device *dev = pci_get_drvdata(pdev); + + iowrite32(0, dev->caddr + PHN_IRQCTL); + + return 0; +} +#else +#define phantom_suspend NULL +#define phantom_resume NULL +#endif + +static struct pci_device_id phantom_pci_tbl[] __devinitdata = { + { .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050, + .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_9050, + .class = PCI_CLASS_BRIDGE_OTHER << 8, .class_mask = 0xffff00 }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, phantom_pci_tbl); + +static struct pci_driver phantom_pci_driver = { + .name = "phantom", + .id_table = phantom_pci_tbl, + .probe = phantom_probe, + .remove = __devexit_p(phantom_remove), + .suspend = phantom_suspend, + .resume = phantom_resume +}; + +static ssize_t phantom_show_version(struct class *cls, char *buf) +{ + return sprintf(buf, PHANTOM_VERSION "\n"); +} + +static CLASS_ATTR(version, 0444, phantom_show_version, NULL); + +static int __init phantom_init(void) +{ + int retval; + dev_t dev; + + phantom_class = class_create(THIS_MODULE, "phantom"); + if (IS_ERR(phantom_class)) { + retval = PTR_ERR(phantom_class); + printk(KERN_ERR "phantom: can't register phantom class\n"); + goto err; + } + retval = class_create_file(phantom_class, &class_attr_version); + if (retval) { + printk(KERN_ERR "phantom: can't create sysfs version file\n"); + goto err_class; + } + + retval = alloc_chrdev_region(&dev, 0, PHANTOM_MAX_MINORS, "phantom"); + if (retval) { + printk(KERN_ERR "phantom: can't register character device\n"); + goto err_attr; + } + phantom_major = MAJOR(dev); + + retval = pci_register_driver(&phantom_pci_driver); + if (retval) { + printk(KERN_ERR "phantom: can't register pci driver\n"); + goto err_unchr; + } + + printk(KERN_INFO "Phantom Linux Driver, version " PHANTOM_VERSION ", " + "init OK\n"); + + return 0; +err_unchr: + unregister_chrdev_region(dev, PHANTOM_MAX_MINORS); +err_attr: + class_remove_file(phantom_class, &class_attr_version); +err_class: + class_destroy(phantom_class); +err: + return retval; +} + +static void __exit phantom_exit(void) +{ + pci_unregister_driver(&phantom_pci_driver); + + unregister_chrdev_region(MKDEV(phantom_major, 0), PHANTOM_MAX_MINORS); + + class_remove_file(phantom_class, &class_attr_version); + class_destroy(phantom_class); + + pr_debug("phantom: module successfully removed\n"); +} + +module_init(phantom_init); +module_exit(phantom_exit); + +MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>"); +MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(PHANTOM_VERSION); diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile new file mode 100644 index 00000000000..7c4c306dfa8 --- /dev/null +++ b/drivers/misc/sgi-gru/Makefile @@ -0,0 +1,7 @@ +ifdef CONFIG_SGI_GRU_DEBUG + EXTRA_CFLAGS += -DDEBUG +endif + +obj-$(CONFIG_SGI_GRU) := gru.o +gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o + diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h new file mode 100644 index 00000000000..3ad76cd18b4 --- /dev/null +++ b/drivers/misc/sgi-gru/gru.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_H__ +#define __GRU_H__ + +/* + * GRU architectural definitions + */ +#define GRU_CACHE_LINE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 +#define GRU_CB_BASE 0 +#define GRU_DS_BASE 0x20000 + +/* + * Size used to map GRU GSeg + */ +#if defined(CONFIG_IA64) +#define GRU_GSEG_PAGESIZE (256 * 1024UL) +#elif defined(CONFIG_X86_64) +#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ +#else +#error "Unsupported architecture" +#endif + +/* + * Structure for obtaining GRU resource information + */ +struct gru_chiplet_info { + int node; + int chiplet; + int blade; + int total_dsr_bytes; + int total_cbr; + int total_user_dsr_bytes; + int total_user_cbr; + int free_user_dsr_bytes; + int free_user_cbr; +}; + +/* + * Statictics kept for each context. + */ +struct gru_gseg_statistics { + unsigned long fmm_tlbmiss; + unsigned long upm_tlbmiss; + unsigned long tlbdropin; + unsigned long context_stolen; + unsigned long reserved[10]; +}; + +/* Flags for GRU options on the gru_create_context() call */ +/* Select one of the follow 4 options to specify how TLB misses are handled */ +#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ +#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */ +#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to + handle fault */ +#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */ +#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */ + + + +#endif /* __GRU_H__ */ diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h new file mode 100644 index 00000000000..d95587cc794 --- /dev/null +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -0,0 +1,735 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_INSTRUCTIONS_H__ +#define __GRU_INSTRUCTIONS_H__ + +extern int gru_check_status_proc(void *cb); +extern int gru_wait_proc(void *cb); +extern void gru_wait_abort_proc(void *cb); + + + +/* + * Architecture dependent functions + */ + +#if defined(CONFIG_IA64) +#include <linux/compiler.h> +#include <asm/intrinsics.h> +#define __flush_cache(p) ia64_fc((unsigned long)p) +/* Use volatile on IA64 to ensure ordering via st4.rel */ +#define gru_ordered_store_ulong(p, v) \ + do { \ + barrier(); \ + *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ + } while (0) +#elif defined(CONFIG_X86_64) +#define __flush_cache(p) clflush(p) +#define gru_ordered_store_ulong(p, v) \ + do { \ + barrier(); \ + *(unsigned long *)p = v; \ + } while (0) +#else +#error "Unsupported architecture" +#endif + +/* + * Control block status and exception codes + */ +#define CBS_IDLE 0 +#define CBS_EXCEPTION 1 +#define CBS_ACTIVE 2 +#define CBS_CALL_OS 3 + +/* CB substatus bitmasks */ +#define CBSS_MSG_QUEUE_MASK 7 +#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8 + +/* CB substatus message queue values (low 3 bits of substatus) */ +#define CBSS_NO_ERROR 0 +#define CBSS_LB_OVERFLOWED 1 +#define CBSS_QLIMIT_REACHED 2 +#define CBSS_PAGE_OVERFLOW 3 +#define CBSS_AMO_NACKED 4 +#define CBSS_PUT_NACKED 5 + +/* + * Structure used to fetch exception detail for CBs that terminate with + * CBS_EXCEPTION + */ +struct control_block_extended_exc_detail { + unsigned long cb; + int opc; + int ecause; + int exopc; + long exceptdet0; + int exceptdet1; + int cbrstate; + int cbrexecstatus; +}; + +/* + * Instruction formats + */ + +/* + * Generic instruction format. + * This definition has precise bit field definitions. + */ +struct gru_instruction_bits { + /* DW 0 - low */ + unsigned int icmd: 1; + unsigned char ima: 3; /* CB_DelRep, unmapped mode */ + unsigned char reserved0: 4; + unsigned int xtype: 3; + unsigned int iaa0: 2; + unsigned int iaa1: 2; + unsigned char reserved1: 1; + unsigned char opc: 8; /* opcode */ + unsigned char exopc: 8; /* extended opcode */ + /* DW 0 - high */ + unsigned int idef2: 22; /* TRi0 */ + unsigned char reserved2: 2; + unsigned char istatus: 2; + unsigned char isubstatus:4; + unsigned char reserved3: 1; + unsigned char tlb_fault_color: 1; + /* DW 1 */ + unsigned long idef4; /* 42 bits: TRi1, BufSize */ + /* DW 2-6 */ + unsigned long idef1; /* BAddr0 */ + unsigned long idef5; /* Nelem */ + unsigned long idef6; /* Stride, Operand1 */ + unsigned long idef3; /* BAddr1, Value, Operand2 */ + unsigned long reserved4; + /* DW 7 */ + unsigned long avalue; /* AValue */ +}; + +/* + * Generic instruction with friendlier names. This format is used + * for inline instructions. + */ +struct gru_instruction { + /* DW 0 */ + union { + unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */ + struct { + unsigned int op32; + unsigned int tri0; + }; + }; + unsigned long tri1_bufsize; /* DW 1 */ + unsigned long baddr0; /* DW 2 */ + unsigned long nelem; /* DW 3 */ + unsigned long op1_stride; /* DW 4 */ + unsigned long op2_value_baddr1; /* DW 5 */ + unsigned long reserved0; /* DW 6 */ + unsigned long avalue; /* DW 7 */ +}; + +/* Some shifts and masks for the low 64 bits of a GRU command */ +#define GRU_CB_ICMD_SHFT 0 +#define GRU_CB_ICMD_MASK 0x1 +#define GRU_CB_XTYPE_SHFT 8 +#define GRU_CB_XTYPE_MASK 0x7 +#define GRU_CB_IAA0_SHFT 11 +#define GRU_CB_IAA0_MASK 0x3 +#define GRU_CB_IAA1_SHFT 13 +#define GRU_CB_IAA1_MASK 0x3 +#define GRU_CB_IMA_SHFT 1 +#define GRU_CB_IMA_MASK 0x3 +#define GRU_CB_OPC_SHFT 16 +#define GRU_CB_OPC_MASK 0xff +#define GRU_CB_EXOPC_SHFT 24 +#define GRU_CB_EXOPC_MASK 0xff +#define GRU_IDEF2_SHFT 32 +#define GRU_IDEF2_MASK 0x3ffff +#define GRU_ISTATUS_SHFT 56 +#define GRU_ISTATUS_MASK 0x3 + +/* GRU instruction opcodes (opc field) */ +#define OP_NOP 0x00 +#define OP_BCOPY 0x01 +#define OP_VLOAD 0x02 +#define OP_IVLOAD 0x03 +#define OP_VSTORE 0x04 +#define OP_IVSTORE 0x05 +#define OP_VSET 0x06 +#define OP_IVSET 0x07 +#define OP_MESQ 0x08 +#define OP_GAMXR 0x09 +#define OP_GAMIR 0x0a +#define OP_GAMIRR 0x0b +#define OP_GAMER 0x0c +#define OP_GAMERR 0x0d +#define OP_BSTORE 0x0e +#define OP_VFLUSH 0x0f + + +/* Extended opcodes values (exopc field) */ + +/* GAMIR - AMOs with implicit operands */ +#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */ +#define EOP_IR_CLR 0x02 /* Fetch and clear */ +#define EOP_IR_INC 0x05 /* Fetch and increment */ +#define EOP_IR_DEC 0x07 /* Fetch and decrement */ +#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */ +#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */ + +/* GAMIRR - Registered AMOs with implicit operands */ +#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */ +#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */ +#define EOP_IRR_INC 0x05 /* Registered fetch and increment */ +#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */ +#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/ + +/* GAMER - AMOs with explicit operands */ +#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ER_OR 0x01 /* Logical OR with memory */ +#define EOP_ER_AND 0x02 /* Logical AND with memory */ +#define EOP_ER_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ER_ADD 0x04 /* Add value to memory */ +#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */ + +/* GAMERR - Registered AMOs with explicit operands */ +#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ERR_OR 0x01 /* Logical OR with memory */ +#define EOP_ERR_AND 0x02 /* Logical AND with memory */ +#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ERR_ADD 0x04 /* Add value to memory */ +#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ERR_EPOLL 0x09 /* Poll for equality */ +#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */ + +/* GAMXR - SGI Arithmetic unit */ +#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */ + + +/* Transfer types (xtype field) */ +#define XTYPE_B 0x0 /* byte */ +#define XTYPE_S 0x1 /* short (2-byte) */ +#define XTYPE_W 0x2 /* word (4-byte) */ +#define XTYPE_DW 0x3 /* doubleword (8-byte) */ +#define XTYPE_CL 0x6 /* cacheline (64-byte) */ + + +/* Instruction access attributes (iaa0, iaa1 fields) */ +#define IAA_RAM 0x0 /* normal cached RAM access */ +#define IAA_NCRAM 0x2 /* noncoherent RAM access */ +#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */ +#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */ + + +/* Instruction mode attributes (ima field) */ +#define IMA_MAPPED 0x0 /* Virtual mode */ +#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */ +#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */ +#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */ + +/* CBE ecause bits */ +#define CBE_CAUSE_RI (1 << 0) +#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1) +#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2) +#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3) +#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4) +#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5) +#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6) +#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7) +#define CBE_CAUSE_TLBHW_ERROR (1 << 8) +#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9) +#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10) +#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11) +#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12) +#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) +#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) +#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) +#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) +#define CBE_CAUSE_FORCED_ERROR (1 << 19) + +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLB_INVAL_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 +#define CBR_EXS_CB_INT_PENDING_BIT 6 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) +#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT) + +/* + * Exceptions are retried for the following cases. If any OTHER bits are set + * in ecause, the exception is not retryable. + */ +#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \ + CBE_CAUSE_TLBHW_ERROR | \ + CBE_CAUSE_RA_REQUEST_TIMEOUT | \ + CBE_CAUSE_RA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_HA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \ + CBE_CAUSE_HA_RESPONSE_DATA_ERROR \ + ) + +/* Message queue head structure */ +union gru_mesqhead { + unsigned long val; + struct { + unsigned int head; + unsigned int limit; + }; +}; + + +/* Generate the low word of a GRU instruction */ +static inline unsigned long +__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype, + unsigned char iaa0, unsigned char iaa1, + unsigned long idef2, unsigned char ima) +{ + return (1 << GRU_CB_ICMD_SHFT) | + ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) | + (idef2<< GRU_IDEF2_SHFT) | + (iaa0 << GRU_CB_IAA0_SHFT) | + (iaa1 << GRU_CB_IAA1_SHFT) | + (ima << GRU_CB_IMA_SHFT) | + (xtype << GRU_CB_XTYPE_SHFT) | + (opcode << GRU_CB_OPC_SHFT) | + (exopc << GRU_CB_EXOPC_SHFT); +} + +/* + * Architecture specific intrinsics + */ +static inline void gru_flush_cache(void *p) +{ + __flush_cache(p); +} + +/* + * Store the lower 64 bits of the command including the "start" bit. Then + * start the instruction executing. + */ +static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64) +{ + gru_ordered_store_ulong(ins, op64); + mb(); + gru_flush_cache(ins); +} + + +/* Convert "hints" to IMA */ +#define CB_IMA(h) ((h) | IMA_UNMAPPED) + +/* Convert data segment cache line index into TRI0 / TRI1 value */ +#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES) + +/* Inline functions for GRU instructions. + * Note: + * - nelem and stride are in elements + * - tri0/tri1 is in bytes for the beginning of the data segment. + */ +static inline void gru_vload_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_ivload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_ivstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, + unsigned char xtype, unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_vset(void *cb, unsigned long mem_addr, + unsigned long value, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_ivset(void *cb, unsigned long mem_addr, + unsigned int tri1, unsigned long value, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_vflush(void *cb, unsigned long mem_addr, + unsigned long nelem, unsigned char xtype, unsigned long stride, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op1_stride = stride; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_nop(void *cb, int hints) +{ + struct gru_instruction *ins = (void *)cb; + + gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints))); +} + + +static inline void gru_bcopy(void *cb, const unsigned long src, + unsigned long dest, + unsigned int tri0, unsigned int xtype, unsigned long nelem, + unsigned int bufsize, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + ins->tri1_bufsize = bufsize; + gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM, + IAA_RAM, tri0, CB_IMA(hints))); +} + +static inline void gru_bstore(void *cb, const unsigned long src, + unsigned long dest, unsigned int tri0, unsigned int xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM, + tri0, CB_IMA(hints))); +} + +static inline void gru_gamir(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamirr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamer(void *cb, int exopc, unsigned long src, + unsigned int xtype, + unsigned long operand1, unsigned long operand2, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamerr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long operand1, + unsigned long operand2, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamxr(void *cb, unsigned long src, + unsigned int tri0, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->nelem = 4; + gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, + IAA_RAM, 0, 0, CB_IMA(hints))); +} + +static inline void gru_mesq(void *cb, unsigned long queue, + unsigned long tri0, unsigned long nelem, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)queue; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline unsigned long gru_get_amo_value(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue; +} + +static inline int gru_get_amo_value_head(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue & 0xffffffff; +} + +static inline int gru_get_amo_value_limit(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue >> 32; +} + +static inline union gru_mesqhead gru_mesq_head(int head, int limit) +{ + union gru_mesqhead mqh; + + mqh.head = head; + mqh.limit = limit; + return mqh; +} + +/* + * Get struct control_block_extended_exc_detail for CB. + */ +extern int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet); + +#define GRU_EXC_STR_SIZE 256 + + +/* + * Control block definition for checking status + */ +struct gru_control_block_status { + unsigned int icmd :1; + unsigned int ima :3; + unsigned int reserved0 :4; + unsigned int unused1 :24; + unsigned int unused2 :24; + unsigned int istatus :2; + unsigned int isubstatus :4; + unsigned int unused3 :2; +}; + +/* Get CB status */ +static inline int gru_get_cb_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->istatus; +} + +/* Get CB message queue substatus */ +static inline int gru_get_cb_message_queue_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus & CBSS_MSG_QUEUE_MASK; +} + +/* Get CB substatus */ +static inline int gru_get_cb_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus; +} + +/* + * User interface to check an instruction status. UPM and exceptions + * are handled automatically. However, this function does NOT wait + * for an active instruction to complete. + * + */ +static inline int gru_check_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + int ret; + + ret = cbs->istatus; + if (ret != CBS_ACTIVE) + ret = gru_check_status_proc(cb); + return ret; +} + +/* + * User interface (via inline function) to wait for an instruction + * to complete. Completion status (IDLE or EXCEPTION is returned + * to the user. Exception due to hardware errors are automatically + * retried before returning an exception. + * + */ +static inline int gru_wait(void *cb) +{ + return gru_wait_proc(cb); +} + +/* + * Wait for CB to complete. Aborts program if error. (Note: error does NOT + * mean TLB mis - only fatal errors such as memory parity error or user + * bugs will cause termination. + */ +static inline void gru_wait_abort(void *cb) +{ + gru_wait_abort_proc(cb); +} + +/* + * Get a pointer to the start of a gseg + * p - Any valid pointer within the gseg + */ +static inline void *gru_get_gseg_pointer (void *p) +{ + return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1)); +} + +/* + * Get a pointer to a control block + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired CB + */ +static inline void *gru_get_cb_pointer(void *gseg, + int index) +{ + return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE; +} + +/* + * Get a pointer to a cacheline in the data segment portion of a GSeg + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired cache line + */ +static inline void *gru_get_data_pointer(void *gseg, int index) +{ + return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES; +} + +/* + * Convert a vaddr into the tri index within the GSEG + * vaddr - virtual address of within gseg + */ +static inline int gru_get_tri(void *vaddr) +{ + return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE; +} +#endif /* __GRU_INSTRUCTIONS_H__ */ diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c new file mode 100644 index 00000000000..38657cdaf54 --- /dev/null +++ b/drivers/misc/sgi-gru/grufault.c @@ -0,0 +1,901 @@ +/* + * SN Platform GRU Driver + * + * FAULT HANDLER FOR GRU DETECTED TLB MISSES + * + * This file contains code that handles TLB misses within the GRU. + * These misses are reported either via interrupts or user polling of + * the user CB. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/security.h> +#include <asm/pgtable.h> +#include "gru.h" +#include "grutables.h" +#include "grulib.h" +#include "gru_instructions.h" +#include <asm/uv/uv_hub.h> + +/* Return codes for vtop functions */ +#define VTOP_SUCCESS 0 +#define VTOP_INVALID -1 +#define VTOP_RETRY -2 + + +/* + * Test if a physical address is a valid GRU GSEG address + */ +static inline int is_gru_paddr(unsigned long paddr) +{ + return paddr >= gru_start_paddr && paddr < gru_end_paddr; +} + +/* + * Find the vma of a GRU segment. Caller must hold mmap_sem. + */ +struct vm_area_struct *gru_find_vma(unsigned long vaddr) +{ + struct vm_area_struct *vma; + + vma = find_vma(current->mm, vaddr); + if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops) + return vma; + return NULL; +} + +/* + * Find and lock the gts that contains the specified user vaddr. + * + * Returns: + * - *gts with the mmap_sem locked for read and the GTS locked. + * - NULL if vaddr invalid OR is not a valid GSEG vaddr. + */ + +static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = NULL; + + down_read(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (vma) + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (gts) + mutex_lock(>s->ts_ctxlock); + else + up_read(&mm->mmap_sem); + return gts; +} + +static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = ERR_PTR(-EINVAL); + + down_write(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (!vma) + goto err; + + gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); + if (IS_ERR(gts)) + goto err; + mutex_lock(>s->ts_ctxlock); + downgrade_write(&mm->mmap_sem); + return gts; + +err: + up_write(&mm->mmap_sem); + return gts; +} + +/* + * Unlock a GTS that was previously locked with gru_find_lock_gts(). + */ +static void gru_unlock_gts(struct gru_thread_state *gts) +{ + mutex_unlock(>s->ts_ctxlock); + up_read(¤t->mm->mmap_sem); +} + +/* + * Set a CB.istatus to active using a user virtual address. This must be done + * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY. + * If the line is evicted, the status may be lost. The in-cache update + * is necessary to prevent the user from seeing a stale cb.istatus that will + * change as soon as the TFH restart is complete. Races may cause an + * occasional failure to clear the cb.istatus, but that is ok. + */ +static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) +{ + if (cbk) { + cbk->istatus = CBS_ACTIVE; + } +} + +/* + * Read & clear a TFM + * + * The GRU has an array of fault maps. A map is private to a cpu + * Only one cpu will be accessing a cpu's fault map. + * + * This function scans the cpu-private fault map & clears all bits that + * are set. The function returns a bitmap that indicates the bits that + * were cleared. Note that sense the maps may be updated asynchronously by + * the GRU, atomic operations must be used to clear bits. + */ +static void get_clear_fault_map(struct gru_state *gru, + struct gru_tlb_fault_map *imap, + struct gru_tlb_fault_map *dmap) +{ + unsigned long i, k; + struct gru_tlb_fault_map *tfm; + + tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id()); + prefetchw(tfm); /* Helps on hardware, required for emulator */ + for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) { + k = tfm->fault_bits[i]; + if (k) + k = xchg(&tfm->fault_bits[i], 0UL); + imap->fault_bits[i] = k; + k = tfm->done_bits[i]; + if (k) + k = xchg(&tfm->done_bits[i], 0UL); + dmap->fault_bits[i] = k; + } + + /* + * Not functionally required but helps performance. (Required + * on emulator) + */ + gru_flush_cache(tfm); +} + +/* + * Atomic (interrupt context) & non-atomic (user context) functions to + * convert a vaddr into a physical address. The size of the page + * is returned in pageshift. + * returns: + * 0 - successful + * < 0 - error code + * 1 - (atomic only) try again in non-atomic context + */ +static int non_atomic_pte_lookup(struct vm_area_struct *vma, + unsigned long vaddr, int write, + unsigned long *paddr, int *pageshift) +{ + struct page *page; + +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else + *pageshift = PAGE_SHIFT; +#endif + if (get_user_pages + (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) + return -EFAULT; + *paddr = page_to_phys(page); + put_page(page); + return 0; +} + +/* + * atomic_pte_lookup + * + * Convert a user virtual address to a physical address + * Only supports Intel large pages (2MB only) on x86_64. + * ZZZ - hugepage support is incomplete + * + * NOTE: mmap_sem is already held on entry to this function. This + * guarantees existence of the page tables. + */ +static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, + int write, unsigned long *paddr, int *pageshift) +{ + pgd_t *pgdp; + pmd_t *pmdp; + pud_t *pudp; + pte_t pte; + + pgdp = pgd_offset(vma->vm_mm, vaddr); + if (unlikely(pgd_none(*pgdp))) + goto err; + + pudp = pud_offset(pgdp, vaddr); + if (unlikely(pud_none(*pudp))) + goto err; + + pmdp = pmd_offset(pudp, vaddr); + if (unlikely(pmd_none(*pmdp))) + goto err; +#ifdef CONFIG_X86_64 + if (unlikely(pmd_large(*pmdp))) + pte = *(pte_t *) pmdp; + else +#endif + pte = *pte_offset_kernel(pmdp, vaddr); + + if (unlikely(!pte_present(pte) || + (write && (!pte_write(pte) || !pte_dirty(pte))))) + return 1; + + *paddr = pte_pfn(pte) << PAGE_SHIFT; +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else + *pageshift = PAGE_SHIFT; +#endif + return 0; + +err: + return 1; +} + +static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, + int write, int atomic, unsigned long *gpa, int *pageshift) +{ + struct mm_struct *mm = gts->ts_mm; + struct vm_area_struct *vma; + unsigned long paddr; + int ret, ps; + + vma = find_vma(mm, vaddr); + if (!vma) + goto inval; + + /* + * Atomic lookup is faster & usually works even if called in non-atomic + * context. + */ + rmb(); /* Must/check ms_range_active before loading PTEs */ + ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps); + if (ret) { + if (atomic) + goto upm; + if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps)) + goto inval; + } + if (is_gru_paddr(paddr)) + goto inval; + paddr = paddr & ~((1UL << ps) - 1); + *gpa = uv_soc_phys_ram_to_gpa(paddr); + *pageshift = ps; + return VTOP_SUCCESS; + +inval: + return VTOP_INVALID; +upm: + return VTOP_RETRY; +} + + +/* + * Flush a CBE from cache. The CBE is clean in the cache. Dirty the + * CBE cacheline so that the line will be written back to home agent. + * Otherwise the line may be silently dropped. This has no impact + * except on performance. + */ +static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) +{ + if (unlikely(cbe)) { + cbe->cbrexecstatus = 0; /* make CL dirty */ + gru_flush_cache(cbe); + } +} + +/* + * Preload the TLB with entries that may be required. Currently, preloading + * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to + * the end of the bcopy tranfer, whichever is smaller. + */ +static void gru_preload_tlb(struct gru_state *gru, + struct gru_thread_state *gts, int atomic, + unsigned long fault_vaddr, int asid, int write, + unsigned char tlb_preload_count, + struct gru_tlb_fault_handle *tfh, + struct gru_control_block_extended *cbe) +{ + unsigned long vaddr = 0, gpa; + int ret, pageshift; + + if (cbe->opccpy != OP_BCOPY) + return; + + if (fault_vaddr == cbe->cbe_baddr0) + vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; + else if (fault_vaddr == cbe->cbe_baddr1) + vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; + + fault_vaddr &= PAGE_MASK; + vaddr &= PAGE_MASK; + vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); + + while (vaddr > fault_vaddr) { + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift))) + return; + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, + vaddr, asid, write, pageshift, gpa); + vaddr -= PAGE_SIZE; + STAT(tlb_preload_page); + } +} + +/* + * Drop a TLB entry into the GRU. The fault is described by info in an TFH. + * Input: + * cb Address of user CBR. Null if not running in user context + * Return: + * 0 = dropin, exception, or switch to UPM successful + * 1 = range invalidate active + * < 0 = error code + * + */ +static int gru_try_dropin(struct gru_state *gru, + struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + struct gru_instruction_bits *cbk) +{ + struct gru_control_block_extended *cbe = NULL; + unsigned char tlb_preload_count = gts->ts_tlb_preload_count; + int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; + unsigned long gpa = 0, vaddr = 0; + + /* + * NOTE: The GRU contains magic hardware that eliminates races between + * TLB invalidates and TLB dropins. If an invalidate occurs + * in the window between reading the TFH and the subsequent TLB dropin, + * the dropin is ignored. This eliminates the need for additional locks. + */ + + /* + * Prefetch the CBE if doing TLB preloading + */ + if (unlikely(tlb_preload_count)) { + cbe = gru_tfh_to_cbe(tfh); + prefetchw(cbe); + } + + /* + * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. + * Might be a hardware race OR a stupid user. Ignore FMM because FMM + * is a transient state. + */ + if (tfh->status != TFHSTATUS_EXCEPTION) { + gru_flush_cache(tfh); + sync_core(); + if (tfh->status != TFHSTATUS_EXCEPTION) + goto failnoexception; + STAT(tfh_stale_on_fault); + } + if (tfh->state == TFHSTATE_IDLE) + goto failidle; + if (tfh->state == TFHSTATE_MISS_FMM && cbk) + goto failfmm; + + write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; + vaddr = tfh->missvaddr; + asid = tfh->missasid; + indexway = tfh->indexway; + if (asid == 0) + goto failnoasid; + + rmb(); /* TFH must be cache resident before reading ms_range_active */ + + /* + * TFH is cache resident - at least briefly. Fail the dropin + * if a range invalidate is active. + */ + if (atomic_read(>s->ts_gms->ms_range_active)) + goto failactive; + + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret == VTOP_INVALID) + goto failinval; + if (ret == VTOP_RETRY) + goto failupm; + + if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { + gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); + if (atomic || !gru_update_cch(gts)) { + gts->ts_force_cch_reload = 1; + goto failupm; + } + } + + if (unlikely(cbe) && pageshift == PAGE_SHIFT) { + gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); + gru_flush_cache_cbe(cbe); + } + + gru_cb_set_istatus_active(cbk); + gts->ustats.tlbdropin++; + tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift)); + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," + " rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, + indexway, write, pageshift, gpa); + STAT(tlb_dropin); + return 0; + +failnoasid: + /* No asid (delayed unload). */ + STAT(tlb_dropin_fail_no_asid); + gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + if (!cbk) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + return -EAGAIN; + +failupm: + /* Atomic failure switch CBR to UPM */ + tfh_user_polling_mode(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_upm); + gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return 1; + +failfmm: + /* FMM state on UPM call */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_fmm); + gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failnoexception: + /* TFH status did not show exception pending */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); + STAT(tlb_dropin_fail_no_exception); + gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", + tfh, tfh->status, tfh->state); + return 0; + +failidle: + /* TFH state was idle - no miss pending */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); + STAT(tlb_dropin_fail_idle); + gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failinval: + /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ + tfh_exception(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_invalid); + gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return -EFAULT; + +failactive: + /* Range invalidate active. Switch to UPM iff atomic */ + if (!cbk) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_range_active); + gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", + tfh, vaddr); + return 1; +} + +/* + * Process an external interrupt from the GRU. This interrupt is + * caused by a TLB miss. + * Note that this is the interrupt handler that is registered with linux + * interrupt handlers. + */ +static irqreturn_t gru_intr(int chiplet, int blade) +{ + struct gru_state *gru; + struct gru_tlb_fault_map imap, dmap; + struct gru_thread_state *gts; + struct gru_tlb_fault_handle *tfh = NULL; + struct completion *cmp; + int cbrnum, ctxnum; + + STAT(intr); + + gru = &gru_base[blade]->bs_grus[chiplet]; + if (!gru) { + dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", + raw_smp_processor_id(), chiplet); + return IRQ_NONE; + } + get_clear_fault_map(gru, &imap, &dmap); + gru_dbg(grudev, + "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", + smp_processor_id(), chiplet, gru->gs_gid, + imap.fault_bits[0], imap.fault_bits[1], + dmap.fault_bits[0], dmap.fault_bits[1]); + + for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { + STAT(intr_cbr); + cmp = gru->gs_blade->bs_async_wq; + if (cmp) + complete(cmp); + gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", + gru->gs_gid, cbrnum, cmp ? cmp->done : -1); + } + + for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { + STAT(intr_tfh); + tfh = get_tfh_by_index(gru, cbrnum); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + + /* + * When hardware sets a bit in the faultmap, it implicitly + * locks the GRU context so that it cannot be unloaded. + * The gts cannot change until a TFH start/writestart command + * is issued. + */ + ctxnum = tfh->ctxnum; + gts = gru->gs_gts[ctxnum]; + + /* Spurious interrupts can cause this. Ignore. */ + if (!gts) { + STAT(intr_spurious); + continue; + } + + /* + * This is running in interrupt context. Trylock the mmap_sem. + * If it fails, retry the fault in user context. + */ + gts->ustats.fmm_tlbmiss++; + if (!gts->ts_force_cch_reload && + down_read_trylock(>s->ts_mm->mmap_sem)) { + gru_try_dropin(gru, gts, tfh, NULL); + up_read(>s->ts_mm->mmap_sem); + } else { + tfh_user_polling_mode(tfh); + STAT(intr_mm_lock_failed); + } + } + return IRQ_HANDLED; +} + +irqreturn_t gru0_intr(int irq, void *dev_id) +{ + return gru_intr(0, uv_numa_blade_id()); +} + +irqreturn_t gru1_intr(int irq, void *dev_id) +{ + return gru_intr(1, uv_numa_blade_id()); +} + +irqreturn_t gru_intr_mblade(int irq, void *dev_id) +{ + int blade; + + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_intr(0, blade); + gru_intr(1, blade); + } + return IRQ_HANDLED; +} + + +static int gru_user_dropin(struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + void *cb) +{ + struct gru_mm_struct *gms = gts->ts_gms; + int ret; + + gts->ustats.upm_tlbmiss++; + while (1) { + wait_event(gms->ms_wait_queue, + atomic_read(&gms->ms_range_active) == 0); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); + if (ret <= 0) + return ret; + STAT(call_os_wait_queue); + } +} + +/* + * This interface is called as a result of a user detecting a "call OS" bit + * in a user CB. Normally means that a TLB fault has occurred. + * cb - user virtual address of the CB + */ +int gru_handle_user_call_os(unsigned long cb) +{ + struct gru_tlb_fault_handle *tfh; + struct gru_thread_state *gts; + void *cbk; + int ucbnum, cbrnum, ret = -EINVAL; + + STAT(call_os); + + /* sanity check the cb pointer */ + ucbnum = get_cb_number((void *)cb); + if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) + return -EINVAL; + + gts = gru_find_lock_gts(cb); + if (!gts) + return -EINVAL; + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); + + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) + goto exit; + + gru_check_context_placement(gts); + + /* + * CCH may contain stale data if ts_force_cch_reload is set. + */ + if (gts->ts_gru && gts->ts_force_cch_reload) { + gts->ts_force_cch_reload = 0; + gru_update_cch(gts); + } + + ret = -EAGAIN; + cbrnum = thread_cbr_number(gts, ucbnum); + if (gts->ts_gru) { + tfh = get_tfh_by_index(gts->ts_gru, cbrnum); + cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, + gts->ts_ctxnum, ucbnum); + ret = gru_user_dropin(gts, tfh, cbk); + } +exit: + gru_unlock_gts(gts); + return ret; +} + +/* + * Fetch the exception detail information for a CB that terminated with + * an exception. + */ +int gru_get_exception_detail(unsigned long arg) +{ + struct control_block_extended_exc_detail excdet; + struct gru_control_block_extended *cbe; + struct gru_thread_state *gts; + int ucbnum, cbrnum, ret; + + STAT(user_exception); + if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) + return -EFAULT; + + gts = gru_find_lock_gts(excdet.cb); + if (!gts) + return -EINVAL; + + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); + ucbnum = get_cb_number((void *)excdet.cb); + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { + ret = -EINVAL; + } else if (gts->ts_gru) { + cbrnum = thread_cbr_number(gts, ucbnum); + cbe = get_cbe_by_index(gts->ts_gru, cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); /* make sure we are have current data */ + excdet.opc = cbe->opccpy; + excdet.exopc = cbe->exopccpy; + excdet.ecause = cbe->ecause; + excdet.exceptdet0 = cbe->idef1upd; + excdet.exceptdet1 = cbe->idef3upd; + excdet.cbrstate = cbe->cbrstate; + excdet.cbrexecstatus = cbe->cbrexecstatus; + gru_flush_cache_cbe(cbe); + ret = 0; + } else { + ret = -EAGAIN; + } + gru_unlock_gts(gts); + + gru_dbg(grudev, + "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, " + "exdet0 0x%lx, exdet1 0x%x\n", + excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus, + excdet.ecause, excdet.exceptdet0, excdet.exceptdet1); + if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) + ret = -EFAULT; + return ret; +} + +/* + * User request to unload a context. Content is saved for possible reload. + */ +static int gru_unload_all_contexts(void) +{ + struct gru_thread_state *gts; + struct gru_state *gru; + int gid, ctxnum; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + foreach_gid(gid) { + gru = GID_TO_GRU(gid); + spin_lock(&gru->gs_lock); + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + gts = gru->gs_gts[ctxnum]; + if (gts && mutex_trylock(>s->ts_ctxlock)) { + spin_unlock(&gru->gs_lock); + gru_unload_context(gts, 1); + mutex_unlock(>s->ts_ctxlock); + spin_lock(&gru->gs_lock); + } + } + spin_unlock(&gru->gs_lock); + } + return 0; +} + +int gru_user_unload_context(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_unload_context_req req; + + STAT(user_unload_context); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx\n", req.gseg); + + if (!req.gseg) + return gru_unload_all_contexts(); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + if (gts->ts_gru) + gru_unload_context(gts, 1); + gru_unlock_gts(gts); + + return 0; +} + +/* + * User request to flush a range of virtual addresses from the GRU TLB + * (Mainly for testing). + */ +int gru_user_flush_tlb(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_flush_tlb_req req; + struct gru_mm_struct *gms; + + STAT(user_flush_tlb); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg, + req.vaddr, req.len); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + gms = gts->ts_gms; + gru_unlock_gts(gts); + gru_flush_tlb_range(gms, req.vaddr, req.len); + + return 0; +} + +/* + * Fetch GSEG statisticss + */ +long gru_get_gseg_statistics(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_get_gseg_statistics_req req; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* + * The library creates arrays of contexts for threaded programs. + * If no gts exists in the array, the context has never been used & all + * statistics are implicitly 0. + */ + gts = gru_find_lock_gts(req.gseg); + if (gts) { + memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); + gru_unlock_gts(gts); + } else { + memset(&req.stats, 0, sizeof(gts->ustats)); + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + + return 0; +} + +/* + * Register the current task as the user of the GSEG slice. + * Needed for TLB fault interrupt targeting. + */ +int gru_set_context_option(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_set_context_option_req req; + int ret = 0; + + STAT(set_context_option); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) { + gts = gru_alloc_locked_gts(req.gseg); + if (IS_ERR(gts)) + return PTR_ERR(gts); + } + + switch (req.op) { + case sco_blade_chiplet: + /* Select blade/chiplet for GRU context */ + if (req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || !gru_base[req.val1] || + req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB) { + ret = -EINVAL; + } else { + gts->ts_user_blade_id = req.val1; + gts->ts_user_chiplet_id = req.val0; + gru_check_context_placement(gts); + } + break; + case sco_gseg_owner: + /* Register the current task as the GSEG owner */ + gts->ts_tgid_owner = current->tgid; + break; + case sco_cch_req_slice: + /* Set the CCH slice option */ + gts->ts_cch_req_slice = req.val1 & 3; + break; + default: + ret = -EINVAL; + } + gru_unlock_gts(gts); + + return ret; +} diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c new file mode 100644 index 00000000000..cb3b4d22847 --- /dev/null +++ b/drivers/misc/sgi-gru/grufile.c @@ -0,0 +1,617 @@ +/* + * SN Platform GRU Driver + * + * FILE OPERATIONS & DRIVER INITIALIZATION + * + * This file supports the user system call for file open, close, mmap, etc. + * This also incudes the driver initialization code. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/interrupt.h> +#include <linux/proc_fs.h> +#include <linux/uaccess.h> +#ifdef CONFIG_X86_64 +#include <asm/uv/uv_irq.h> +#endif +#include <asm/uv/uv.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_mmrs.h> + +struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; +unsigned long gru_start_paddr __read_mostly; +void *gru_start_vaddr __read_mostly; +unsigned long gru_end_paddr __read_mostly; +unsigned int gru_max_gids __read_mostly; +struct gru_stats_s gru_stats; + +/* Guaranteed user available resources on each node */ +static int max_user_cbrs, max_user_dsr_bytes; + +static struct miscdevice gru_miscdev; + + +/* + * gru_vma_close + * + * Called when unmapping a device mapping. Frees all gru resources + * and tables belonging to the vma. + */ +static void gru_vma_close(struct vm_area_struct *vma) +{ + struct gru_vma_data *vdata; + struct gru_thread_state *gts; + struct list_head *entry, *next; + + if (!vma->vm_private_data) + return; + + vdata = vma->vm_private_data; + vma->vm_private_data = NULL; + gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file, + vdata); + list_for_each_safe(entry, next, &vdata->vd_head) { + gts = + list_entry(entry, struct gru_thread_state, ts_next); + list_del(>s->ts_next); + mutex_lock(>s->ts_ctxlock); + if (gts->ts_gru) + gru_unload_context(gts, 0); + mutex_unlock(>s->ts_ctxlock); + gts_drop(gts); + } + kfree(vdata); + STAT(vdata_free); +} + +/* + * gru_file_mmap + * + * Called when mmapping the device. Initializes the vma with a fault handler + * and private data structure necessary to allocate, track, and free the + * underlying pages. + */ +static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) + return -EPERM; + + if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || + vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) + return -EINVAL; + + vma->vm_flags |= + (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | + VM_RESERVED); + vma->vm_page_prot = PAGE_SHARED; + vma->vm_ops = &gru_vm_ops; + + vma->vm_private_data = gru_alloc_vma_data(vma, 0); + if (!vma->vm_private_data) + return -ENOMEM; + + gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n", + file, vma->vm_start, vma, vma->vm_private_data); + return 0; +} + +/* + * Create a new GRU context + */ +static int gru_create_new_context(unsigned long arg) +{ + struct gru_create_context_req req; + struct vm_area_struct *vma; + struct gru_vma_data *vdata; + int ret = -EINVAL; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + if (req.data_segment_bytes > max_user_dsr_bytes) + return -EINVAL; + if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count) + return -EINVAL; + + if (!(req.options & GRU_OPT_MISS_MASK)) + req.options |= GRU_OPT_MISS_FMM_INTR; + + down_write(¤t->mm->mmap_sem); + vma = gru_find_vma(req.gseg); + if (vma) { + vdata = vma->vm_private_data; + vdata->vd_user_options = req.options; + vdata->vd_dsr_au_count = + GRU_DS_BYTES_TO_AU(req.data_segment_bytes); + vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); + vdata->vd_tlb_preload_count = req.tlb_preload_count; + ret = 0; + } + up_write(¤t->mm->mmap_sem); + + return ret; +} + +/* + * Get GRU configuration info (temp - for emulator testing) + */ +static long gru_get_config_info(unsigned long arg) +{ + struct gru_config_info info; + int nodesperblade; + + if (num_online_nodes() > 1 && + (uv_node_to_blade_id(1) == uv_node_to_blade_id(0))) + nodesperblade = 2; + else + nodesperblade = 1; + info.cpus = num_online_cpus(); + info.nodes = num_online_nodes(); + info.blades = info.nodes / nodesperblade; + info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades; + + if (copy_to_user((void __user *)arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +/* + * gru_file_unlocked_ioctl + * + * Called to update file attributes via IOCTL calls. + */ +static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, + unsigned long arg) +{ + int err = -EBADRQC; + + gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg); + + switch (req) { + case GRU_CREATE_CONTEXT: + err = gru_create_new_context(arg); + break; + case GRU_SET_CONTEXT_OPTION: + err = gru_set_context_option(arg); + break; + case GRU_USER_GET_EXCEPTION_DETAIL: + err = gru_get_exception_detail(arg); + break; + case GRU_USER_UNLOAD_CONTEXT: + err = gru_user_unload_context(arg); + break; + case GRU_USER_FLUSH_TLB: + err = gru_user_flush_tlb(arg); + break; + case GRU_USER_CALL_OS: + err = gru_handle_user_call_os(arg); + break; + case GRU_GET_GSEG_STATISTICS: + err = gru_get_gseg_statistics(arg); + break; + case GRU_KTEST: + err = gru_ktest(arg); + break; + case GRU_GET_CONFIG_INFO: + err = gru_get_config_info(arg); + break; + case GRU_DUMP_CHIPLET_STATE: + err = gru_dump_chiplet_request(arg); + break; + } + return err; +} + +/* + * Called at init time to build tables for all GRUs that are present in the + * system. + */ +static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, + void *vaddr, int blade_id, int chiplet_id) +{ + spin_lock_init(&gru->gs_lock); + spin_lock_init(&gru->gs_asid_lock); + gru->gs_gru_base_paddr = paddr; + gru->gs_gru_base_vaddr = vaddr; + gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id; + gru->gs_blade = gru_base[blade_id]; + gru->gs_blade_id = blade_id; + gru->gs_chiplet_id = chiplet_id; + gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; + gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; + gru->gs_asid_limit = MAX_ASID; + gru_tgh_flush_init(gru); + if (gru->gs_gid >= gru_max_gids) + gru_max_gids = gru->gs_gid + 1; + gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n", + blade_id, gru->gs_gid, gru->gs_gru_base_vaddr, + gru->gs_gru_base_paddr); +} + +static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) +{ + int pnode, nid, bid, chip; + int cbrs, dsrbytes, n; + int order = get_order(sizeof(struct gru_blade_state)); + struct page *page; + struct gru_state *gru; + unsigned long paddr; + void *vaddr; + + max_user_cbrs = GRU_NUM_CB; + max_user_dsr_bytes = GRU_NUM_DSR_BYTES; + for_each_possible_blade(bid) { + pnode = uv_blade_to_pnode(bid); + nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */ + page = alloc_pages_node(nid, GFP_KERNEL, order); + if (!page) + goto fail; + gru_base[bid] = page_address(page); + memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); + gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; + spin_lock_init(&gru_base[bid]->bs_lock); + init_rwsem(&gru_base[bid]->bs_kgts_sema); + + dsrbytes = 0; + cbrs = 0; + for (gru = gru_base[bid]->bs_grus, chip = 0; + chip < GRU_CHIPLETS_PER_BLADE; + chip++, gru++) { + paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); + vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); + gru_init_chiplet(gru, paddr, vaddr, bid, chip); + n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + cbrs = max(cbrs, n); + n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + dsrbytes = max(dsrbytes, n); + } + max_user_cbrs = min(max_user_cbrs, cbrs); + max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes); + } + + return 0; + +fail: + for (bid--; bid >= 0; bid--) + free_pages((unsigned long)gru_base[bid], order); + return -ENOMEM; +} + +static void gru_free_tables(void) +{ + int bid; + int order = get_order(sizeof(struct gru_state) * + GRU_CHIPLETS_PER_BLADE); + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) + free_pages((unsigned long)gru_base[bid], order); +} + +static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) +{ + unsigned long mmr = 0; + int core; + + /* + * We target the cores of a blade and not the hyperthreads themselves. + * There is a max of 8 cores per socket and 2 sockets per blade, + * making for a max total of 16 cores (i.e., 16 CPUs without + * hyperthreading and 32 CPUs with hyperthreading). + */ + core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu)) + return 0; + + if (chiplet == 0) { + mmr = UVH_GR0_TLB_INT0_CONFIG + + core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG); + } else if (chiplet == 1) { + mmr = UVH_GR1_TLB_INT0_CONFIG + + core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG); + } else { + BUG(); + } + + *corep = core; + return mmr; +} + +#ifdef CONFIG_IA64 + +static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; + +static void gru_noop(unsigned int irq) +{ +} + +static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { + [0 ... GRU_CHIPLETS_PER_BLADE - 1] { + .mask = gru_noop, + .unmask = gru_noop, + .ack = gru_noop + } +}; + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq = IRQ_GRU + chiplet; + int ret, core; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + if (gru_irq_count[chiplet] == 0) { + gru_chip[chiplet].name = irq_name; + ret = set_irq_chip(irq, &gru_chip[chiplet]); + if (ret) { + printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + } + gru_irq_count[chiplet]++; + + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + unsigned long mmr; + int core, irq = IRQ_GRU + chiplet; + + if (gru_irq_count[chiplet] == 0) + return; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return; + + if (--gru_irq_count[chiplet] == 0) + free_irq(irq, NULL); +} + +#elif defined CONFIG_X86_64 + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq, core; + int ret; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU); + if (irq < 0) { + printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -irq); + return irq; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + uv_teardown_irq(irq); + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq; + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + int irq, core; + unsigned long mmr; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr) { + irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core]; + if (irq) { + free_irq(irq, NULL); + uv_teardown_irq(irq); + } + } +} + +#endif + +static void gru_teardown_tlb_irqs(void) +{ + int blade; + int cpu; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + gru_chiplet_teardown_tlb_irq(0, cpu, blade); + gru_chiplet_teardown_tlb_irq(1, cpu, blade); + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_chiplet_teardown_tlb_irq(0, 0, blade); + gru_chiplet_teardown_tlb_irq(1, 0, blade); + } +} + +static int gru_setup_tlb_irqs(void) +{ + int blade; + int cpu; + int ret; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade); + if (ret != 0) + goto exit1; + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + } + + return 0; + +exit1: + gru_teardown_tlb_irqs(); + return ret; +} + +/* + * gru_init + * + * Called at boot or module load time to initialize the GRUs. + */ +static int __init gru_init(void) +{ + int ret; + + if (!is_uv_system()) + return 0; + +#if defined CONFIG_IA64 + gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ +#else + gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) & + 0x7fffffffffffUL; +#endif + gru_start_vaddr = __va(gru_start_paddr); + gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; + printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", + gru_start_paddr, gru_end_paddr); + ret = misc_register(&gru_miscdev); + if (ret) { + printk(KERN_ERR "%s: misc_register failed\n", + GRU_DRIVER_ID_STR); + goto exit0; + } + + ret = gru_proc_init(); + if (ret) { + printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); + goto exit1; + } + + ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); + if (ret) { + printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); + goto exit2; + } + + ret = gru_setup_tlb_irqs(); + if (ret != 0) + goto exit3; + + gru_kservices_init(); + + printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, + GRU_DRIVER_VERSION_STR); + return 0; + +exit3: + gru_free_tables(); +exit2: + gru_proc_exit(); +exit1: + misc_deregister(&gru_miscdev); +exit0: + return ret; + +} + +static void __exit gru_exit(void) +{ + if (!is_uv_system()) + return; + + gru_teardown_tlb_irqs(); + gru_kservices_exit(); + gru_free_tables(); + misc_deregister(&gru_miscdev); + gru_proc_exit(); +} + +static const struct file_operations gru_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = gru_file_unlocked_ioctl, + .mmap = gru_file_mmap, +}; + +static struct miscdevice gru_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "gru", + .fops = &gru_fops, +}; + +const struct vm_operations_struct gru_vm_ops = { + .close = gru_vma_close, + .fault = gru_fault, +}; + +#ifndef MODULE +fs_initcall(gru_init); +#else +module_init(gru_init); +#endif +module_exit(gru_exit); + +module_param(gru_options, ulong, 0644); +MODULE_PARM_DESC(gru_options, "Various debug options"); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR); +MODULE_VERSION(GRU_DRIVER_VERSION_STR); + diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c new file mode 100644 index 00000000000..2f30badc6ff --- /dev/null +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -0,0 +1,216 @@ +/* + * GRU KERNEL MCS INSTRUCTIONS + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +/* 10 sec */ +#ifdef CONFIG_IA64 +#include <asm/processor.h> +#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) +#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) +#else +#include <asm/tsc.h> +#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) +#endif + +/* Extract the status field from a kernel handle */ +#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) + +struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + +static void update_mcs_stats(enum mcs_op op, unsigned long clks) +{ + unsigned long nsec; + + nsec = CLKS2NSEC(clks); + atomic_long_inc(&mcs_op_statistics[op].count); + atomic_long_add(nsec, &mcs_op_statistics[op].total); + if (mcs_op_statistics[op].max < nsec) + mcs_op_statistics[op].max = nsec; +} + +static void start_instruction(void *h) +{ + unsigned long *w0 = h; + + wmb(); /* setting CMD/STATUS bits must be last */ + *w0 = *w0 | 0x20001; + gru_flush_cache(h); +} + +static void report_instruction_timeout(void *h) +{ + unsigned long goff = GSEGPOFF((unsigned long)h); + char *id = "???"; + + if (TYPE_IS(CCH, goff)) + id = "CCH"; + else if (TYPE_IS(TGH, goff)) + id = "TGH"; + else if (TYPE_IS(TFH, goff)) + id = "TFH"; + + panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id); +} + +static int wait_instruction_complete(void *h, enum mcs_op opc) +{ + int status; + unsigned long start_time = get_cycles(); + + while (1) { + cpu_relax(); + status = GET_MSEG_HANDLE_STATUS(h); + if (status != CCHSTATUS_ACTIVE) + break; + if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) { + report_instruction_timeout(h); + start_time = get_cycles(); + } + } + if (gru_options & OPT_STATS) + update_mcs_stats(opc, get_cycles() - start_time); + return status; +} + +int cch_allocate(struct gru_context_configuration_handle *cch) +{ + int ret; + + cch->opc = CCHOP_ALLOCATE; + start_instruction(cch); + ret = wait_instruction_complete(cch, cchop_allocate); + + /* + * Stop speculation into the GSEG being mapped by the previous ALLOCATE. + * The GSEG memory does not exist until the ALLOCATE completes. + */ + sync_core(); + return ret; +} + +int cch_start(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_START; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_start); +} + +int cch_interrupt(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_INTERRUPT; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt); +} + +int cch_deallocate(struct gru_context_configuration_handle *cch) +{ + int ret; + + cch->opc = CCHOP_DEALLOCATE; + start_instruction(cch); + ret = wait_instruction_complete(cch, cchop_deallocate); + + /* + * Stop speculation into the GSEG being unmapped by the previous + * DEALLOCATE. + */ + sync_core(); + return ret; +} + +int cch_interrupt_sync(struct gru_context_configuration_handle + *cch) +{ + cch->opc = CCHOP_INTERRUPT_SYNC; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt_sync); +} + +int tgh_invalidate(struct gru_tlb_global_handle *tgh, + unsigned long vaddr, unsigned long vaddrmask, + int asid, int pagesize, int global, int n, + unsigned short ctxbitmap) +{ + tgh->vaddr = vaddr; + tgh->asid = asid; + tgh->pagesize = pagesize; + tgh->n = n; + tgh->global = global; + tgh->vaddrmask = vaddrmask; + tgh->ctxbitmap = ctxbitmap; + tgh->opc = TGHOP_TLBINV; + start_instruction(tgh); + return wait_instruction_complete(tgh, tghop_invalidate); +} + +int tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_ONLY; + start_instruction(tfh); + return wait_instruction_complete(tfh, tfhop_write_only); +} + +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_RESTART; + start_instruction(tfh); +} + +void tfh_restart(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_RESTART; + start_instruction(tfh); +} + +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_USER_POLLING_MODE; + start_instruction(tfh); +} + +void tfh_exception(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_EXCEPTION; + start_instruction(tfh); +} + diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h new file mode 100644 index 00000000000..3f998b924d8 --- /dev/null +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -0,0 +1,531 @@ +/* + * SN Platform GRU Driver + * + * GRU HANDLE DEFINITION + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUHANDLES_H__ +#define __GRUHANDLES_H__ +#include "gru_instructions.h" + +/* + * Manifest constants for GRU Memory Map + */ +#define GRU_GSEG0_BASE 0 +#define GRU_MCS_BASE (64 * 1024 * 1024) +#define GRU_SIZE (128UL * 1024 * 1024) + +/* Handle & resource counts */ +#define GRU_NUM_CB 128 +#define GRU_NUM_DSR_BYTES (32 * 1024) +#define GRU_NUM_TFM 16 +#define GRU_NUM_TGH 24 +#define GRU_NUM_CBE 128 +#define GRU_NUM_TFH 128 +#define GRU_NUM_CCH 16 + +/* Maximum resource counts that can be reserved by user programs */ +#define GRU_NUM_USER_CBR GRU_NUM_CBE +#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES + +/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles + * are the same */ +#define GRU_HANDLE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 + +/* Base addresses of handles */ +#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000) +#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000) +#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) +#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) +#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) + +/* User gseg constants */ +#define GRU_GSEG_STRIDE (4 * 1024 * 1024) +#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1)) + +/* Data segment constants */ +#define GRU_DSR_AU_BYTES 1024 +#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES) + +/* Control block constants */ +#define GRU_CBR_AU_SIZE 2 +#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE) + +/* Convert resource counts to the number of AU */ +#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES) +#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE) + +/* UV limits */ +#define GRU_CHIPLETS_PER_HUB 2 +#define GRU_HUBS_PER_BLADE 1 +#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB) + +/* User GRU Gseg offsets */ +#define GRU_CB_BASE 0 +#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE) +#define GRU_DS_BASE 0x20000 +#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES) + +/* Convert a GRU physical address to the chiplet offset */ +#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) + +/* Convert an arbitrary handle address to the beginning of the GRU segment */ +#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) + +/* Test a valid handle address to determine the type */ +#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \ + GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \ + (((h) & (GRU_HANDLE_STRIDE - 1)) == 0)) + + +/* General addressing macros. */ +static inline void *get_gseg_base_address(void *base, int ctxnum) +{ + return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum); +} + +static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + + GRU_CB_BASE + GRU_HANDLE_STRIDE * line); +} + +static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE + + GRU_CACHE_LINE_BYTES * line); +} + +static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum) +{ + return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum) +{ + return (struct gru_control_block_extended *)(base + GRU_CBE_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_context_configuration_handle *get_cch(void *base, + int ctxnum) +{ + return (struct gru_context_configuration_handle *)(base + + GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline unsigned long get_cb_number(void *cb) +{ + return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) / + GRU_HANDLE_STRIDE; +} + +/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/ +static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode, + int chiplet) +{ + return paddr + GRU_SIZE * (2 * pnode + chiplet); +} + +static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) +{ + return vaddr + GRU_SIZE * (2 * pnode + chiplet); +} + +static inline struct gru_control_block_extended *gru_tfh_to_cbe( + struct gru_tlb_fault_handle *tfh) +{ + unsigned long cbe; + + cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE; + return (struct gru_control_block_extended*)cbe; +} + + + + +/* + * Global TLB Fault Map + * Bitmap of outstanding TLB misses needing interrupt/polling service. + * + */ +struct gru_tlb_fault_map { + unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill0[2]; + unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill1[2]; +}; + +/* + * TGH - TLB Global Handle + * Used for TLB flushing. + * + */ +struct gru_tlb_global_handle { + unsigned int cmd:1; /* DW 0 */ + unsigned int delresp:1; + unsigned int opc:1; + unsigned int fill1:5; + + unsigned int fill2:8; + + unsigned int status:2; + unsigned long fill3:2; + unsigned int state:3; + unsigned long fill4:1; + + unsigned int cause:3; + unsigned long fill5:37; + + unsigned long vaddr:64; /* DW 1 */ + + unsigned int asid:24; /* DW 2 */ + unsigned int fill6:8; + + unsigned int pagesize:5; + unsigned int fill7:11; + + unsigned int global:1; + unsigned int fill8:15; + + unsigned long vaddrmask:39; /* DW 3 */ + unsigned int fill9:9; + unsigned int n:10; + unsigned int fill10:6; + + unsigned int ctxbitmap:16; /* DW4 */ + unsigned long fill11[3]; +}; + +enum gru_tgh_cmd { + TGHCMD_START +}; + +enum gru_tgh_opc { + TGHOP_TLBNOP, + TGHOP_TLBINV +}; + +enum gru_tgh_status { + TGHSTATUS_IDLE, + TGHSTATUS_EXCEPTION, + TGHSTATUS_ACTIVE +}; + +enum gru_tgh_state { + TGHSTATE_IDLE, + TGHSTATE_PE_INVAL, + TGHSTATE_INTERRUPT_INVAL, + TGHSTATE_WAITDONE, + TGHSTATE_RESTART_CTX, +}; + +enum gru_tgh_cause { + TGHCAUSE_RR_ECC, + TGHCAUSE_TLB_ECC, + TGHCAUSE_LRU_ECC, + TGHCAUSE_PS_ECC, + TGHCAUSE_MUL_ERR, + TGHCAUSE_DATA_ERR, + TGHCAUSE_SW_FORCE +}; + + +/* + * TFH - TLB Global Handle + * Used for TLB dropins into the GRU TLB. + * + */ +struct gru_tlb_fault_handle { + unsigned int cmd:1; /* DW 0 - low 32*/ + unsigned int delresp:1; + unsigned int fill0:2; + unsigned int opc:3; + unsigned int fill1:9; + + unsigned int status:2; + unsigned int fill2:2; + unsigned int state:3; + unsigned int fill3:1; + + unsigned int cause:6; + unsigned int cb_int:1; + unsigned int fill4:1; + + unsigned int indexway:12; /* DW 0 - high 32 */ + unsigned int fill5:4; + + unsigned int ctxnum:4; + unsigned int fill6:12; + + unsigned long missvaddr:64; /* DW 1 */ + + unsigned int missasid:24; /* DW 2 */ + unsigned int fill7:8; + unsigned int fillasid:24; + unsigned int dirty:1; + unsigned int gaa:2; + unsigned long fill8:5; + + unsigned long pfn:41; /* DW 3 */ + unsigned int fill9:7; + unsigned int pagesize:5; + unsigned int fill10:11; + + unsigned long fillvaddr:64; /* DW 4 */ + + unsigned long fill11[3]; +}; + +enum gru_tfh_opc { + TFHOP_NOOP, + TFHOP_RESTART, + TFHOP_WRITE_ONLY, + TFHOP_WRITE_RESTART, + TFHOP_EXCEPTION, + TFHOP_USER_POLLING_MODE = 7, +}; + +enum tfh_status { + TFHSTATUS_IDLE, + TFHSTATUS_EXCEPTION, + TFHSTATUS_ACTIVE, +}; + +enum tfh_state { + TFHSTATE_INACTIVE, + TFHSTATE_IDLE, + TFHSTATE_MISS_UPM, + TFHSTATE_MISS_FMM, + TFHSTATE_HW_ERR, + TFHSTATE_WRITE_TLB, + TFHSTATE_RESTART_CBR, +}; + +/* TFH cause bits */ +enum tfh_cause { + TFHCAUSE_NONE, + TFHCAUSE_TLB_MISS, + TFHCAUSE_TLB_MOD, + TFHCAUSE_HW_ERROR_RR, + TFHCAUSE_HW_ERROR_MAIN_ARRAY, + TFHCAUSE_HW_ERROR_VALID, + TFHCAUSE_HW_ERROR_PAGESIZE, + TFHCAUSE_INSTRUCTION_EXCEPTION, + TFHCAUSE_UNCORRECTIBLE_ERROR, +}; + +/* GAA values */ +#define GAA_RAM 0x0 +#define GAA_NCRAM 0x2 +#define GAA_MMIO 0x1 +#define GAA_REGISTER 0x3 + +/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */ +#define GRU_PADDR_SHIFT 12 + +/* + * Context Configuration handle + * Used to allocate resources to a GSEG context. + * + */ +struct gru_context_configuration_handle { + unsigned int cmd:1; /* DW0 */ + unsigned int delresp:1; + unsigned int opc:3; + unsigned int unmap_enable:1; + unsigned int req_slice_set_enable:1; + unsigned int req_slice:2; + unsigned int cb_int_enable:1; + unsigned int tlb_int_enable:1; + unsigned int tfm_fault_bit_enable:1; + unsigned int tlb_int_select:4; + + unsigned int status:2; + unsigned int state:2; + unsigned int reserved2:4; + + unsigned int cause:4; + unsigned int tfm_done_bit_enable:1; + unsigned int unused:3; + + unsigned int dsr_allocation_map; + + unsigned long cbr_allocation_map; /* DW1 */ + + unsigned int asid[8]; /* DW 2 - 5 */ + unsigned short sizeavail[8]; /* DW 6 - 7 */ +} __attribute__ ((packed)); + +enum gru_cch_opc { + CCHOP_START = 1, + CCHOP_ALLOCATE, + CCHOP_INTERRUPT, + CCHOP_DEALLOCATE, + CCHOP_INTERRUPT_SYNC, +}; + +enum gru_cch_status { + CCHSTATUS_IDLE, + CCHSTATUS_EXCEPTION, + CCHSTATUS_ACTIVE, +}; + +enum gru_cch_state { + CCHSTATE_INACTIVE, + CCHSTATE_MAPPED, + CCHSTATE_ACTIVE, + CCHSTATE_INTERRUPTED, +}; + +/* CCH Exception cause */ +enum gru_cch_cause { + CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1, + CCHCAUSE_ILLEGAL_OPCODE = 2, + CCHCAUSE_INVALID_START_REQUEST = 3, + CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4, + CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5, + CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6, + CCHCAUSE_CCH_BUSY = 7, + CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8, + CCHCAUSE_BAD_TFM_CONFIG = 9, + CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10, + CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11, + CCHCAUSE_CBR_DEALLOCATION_ERROR = 12, +}; +/* + * CBE - Control Block Extended + * Maintains internal GRU state for active CBs. + * + */ +struct gru_control_block_extended { + unsigned int reserved0:1; /* DW 0 - low */ + unsigned int imacpy:3; + unsigned int reserved1:4; + unsigned int xtypecpy:3; + unsigned int iaa0cpy:2; + unsigned int iaa1cpy:2; + unsigned int reserved2:1; + unsigned int opccpy:8; + unsigned int exopccpy:8; + + unsigned int idef2cpy:22; /* DW 0 - high */ + unsigned int reserved3:10; + + unsigned int idef4cpy:22; /* DW 1 */ + unsigned int reserved4:10; + unsigned int idef4upd:22; + unsigned int reserved5:10; + + unsigned long idef1upd:64; /* DW 2 */ + + unsigned long idef5cpy:64; /* DW 3 */ + + unsigned long idef6cpy:64; /* DW 4 */ + + unsigned long idef3upd:64; /* DW 5 */ + + unsigned long idef5upd:64; /* DW 6 */ + + unsigned int idef2upd:22; /* DW 7 */ + unsigned int reserved6:10; + + unsigned int ecause:20; + unsigned int cbrstate:4; + unsigned int cbrexecstatus:8; +}; + +/* CBE fields for active BCOPY instructions */ +#define cbe_baddr0 idef1upd +#define cbe_baddr1 idef3upd +#define cbe_src_cl idef6cpy +#define cbe_nelemcur idef5upd + +enum gru_cbr_state { + CBRSTATE_INACTIVE, + CBRSTATE_IDLE, + CBRSTATE_PE_CHECK, + CBRSTATE_QUEUED, + CBRSTATE_WAIT_RESPONSE, + CBRSTATE_INTERRUPTED, + CBRSTATE_INTERRUPTED_MISS_FMM, + CBRSTATE_BUSY_INTERRUPT_MISS_FMM, + CBRSTATE_INTERRUPTED_MISS_UPM, + CBRSTATE_BUSY_INTERRUPTED_MISS_UPM, + CBRSTATE_REQUEST_ISSUE, + CBRSTATE_BUSY_INTERRUPT, +}; + +/* CBE cbrexecstatus bits - defined in gru_instructions.h*/ +/* CBE ecause bits - defined in gru_instructions.h */ + +/* + * Convert a processor pagesize into the strange encoded pagesize used by the + * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT) + * pagesize log pagesize grupagesize + * 4k 12 0 + * 16k 14 1 + * 64k 16 2 + * 256k 18 3 + * 1m 20 4 + * 2m 21 5 + * 4m 22 6 + * 16m 24 7 + * 64m 26 8 + * ... + */ +#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6) +#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh)) + +/* minimum TLB purge count to ensure a full purge */ +#define GRUMAXINVAL 1024UL + +int cch_allocate(struct gru_context_configuration_handle *cch); +int cch_start(struct gru_context_configuration_handle *cch); +int cch_interrupt(struct gru_context_configuration_handle *cch); +int cch_deallocate(struct gru_context_configuration_handle *cch); +int cch_interrupt_sync(struct gru_context_configuration_handle *cch); +int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, + unsigned long vaddrmask, int asid, int pagesize, int global, int n, + unsigned short ctxbitmap); +int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_restart(struct gru_tlb_fault_handle *tfh); +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh); +void tfh_exception(struct gru_tlb_fault_handle *tfh); + +#endif /* __GRUHANDLES_H__ */ diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c new file mode 100644 index 00000000000..9b2062d1732 --- /dev/null +++ b/drivers/misc/sgi-gru/grukdump.c @@ -0,0 +1,235 @@ +/* + * SN Platform GRU Driver + * + * Dump GRU State + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <asm/uv/uv_hub.h> +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" +#include "grulib.h" + +#define CCH_LOCK_ATTEMPTS 10 + +static int gru_user_copy_handle(void __user **dp, void *s) +{ + if (copy_to_user(*dp, s, GRU_HANDLE_BYTES)) + return -1; + *dp += GRU_HANDLE_BYTES; + return 0; +} + +static int gru_dump_context_data(void *grubase, + struct gru_context_configuration_handle *cch, + void __user *ubuf, int ctxnum, int dsrcnt, + int flush_cbrs) +{ + void *cb, *cbe, *tfh, *gseg; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + tfh = grubase + GRU_TFH_BASE; + + for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { + if (flush_cbrs) + gru_flush_cache(cb); + if (gru_user_copy_handle(&ubuf, cb)) + goto fail; + if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) + goto fail; + if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE)) + goto fail; + cb += GRU_HANDLE_STRIDE; + } + if (dsrcnt) + memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE); + return 0; + +fail: + return -EFAULT; +} + +static int gru_dump_tfm(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_fault_map *tfm; + int i, ret, bytes; + + bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TFM; i++) { + tfm = get_tfm(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tfm)) + goto fail; + } + return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_tgh(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_global_handle *tgh; + int i, ret, bytes; + + bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TGH; i++) { + tgh = get_tgh(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tgh)) + goto fail; + } + return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_context(struct gru_state *gru, int ctxnum, + void __user *ubuf, void __user *ubufend, char data_opt, + char lock_cch, char flush_cbrs) +{ + struct gru_dump_context_header hdr; + struct gru_dump_context_header __user *uhdr = ubuf; + struct gru_context_configuration_handle *cch, *ubufcch; + struct gru_thread_state *gts; + int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0; + void *grubase; + + memset(&hdr, 0, sizeof(hdr)); + grubase = gru->gs_gru_base_vaddr; + cch = get_cch(grubase, ctxnum); + for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) { + cch_locked = trylock_cch_handle(cch); + if (cch_locked) + break; + msleep(1); + } + + ubuf += sizeof(hdr); + ubufcch = ubuf; + if (gru_user_copy_handle(&ubuf, cch)) + goto fail; + if (cch_locked) + ubufcch->delresp = 0; + bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES; + + if (cch_locked || !lock_cch) { + gts = gru->gs_gts[ctxnum]; + if (gts && gts->ts_vma) { + hdr.pid = gts->ts_tgid_owner; + hdr.vaddr = gts->ts_vma->vm_start; + } + if (cch->state != CCHSTATE_INACTIVE) { + cbrcnt = hweight64(cch->cbr_allocation_map) * + GRU_CBR_AU_SIZE; + dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) * + GRU_DSR_AU_CL : 0; + } + bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + else + ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, + dsrcnt, flush_cbrs); + } + if (cch_locked) + unlock_cch_handle(cch); + if (ret) + return ret; + + hdr.magic = GRU_DUMP_MAGIC; + hdr.gid = gru->gs_gid; + hdr.ctxnum = ctxnum; + hdr.cbrcnt = cbrcnt; + hdr.dsrcnt = dsrcnt; + hdr.cch_locked = cch_locked; + if (!ret && copy_to_user((void __user *)uhdr, &hdr, sizeof(hdr))) + ret = -EFAULT; + + return ret ? ret : bytes; + +fail: + unlock_cch_handle(cch); + return -EFAULT; +} + +int gru_dump_chiplet_request(unsigned long arg) +{ + struct gru_state *gru; + struct gru_dump_chiplet_state_req req; + void __user *ubuf; + void __user *ubufend; + int ctxnum, ret, cnt = 0; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* Currently, only dump by gid is implemented */ + if (req.gid >= gru_max_gids || req.gid < 0) + return -EINVAL; + + gru = GID_TO_GRU(req.gid); + ubuf = req.buf; + ubufend = req.buf + req.buflen; + + ret = gru_dump_tfm(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + ret = gru_dump_tgh(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (req.ctxnum == ctxnum || req.ctxnum < 0) { + ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, + req.data_opt, req.lock_cch, + req.flush_cbrs); + if (ret < 0) + goto fail; + ubuf += ret; + cnt++; + } + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + return cnt; + +fail: + return ret; +} diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c new file mode 100644 index 00000000000..34749ee88df --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.c @@ -0,0 +1,1161 @@ +/* + * SN Platform GRU Driver + * + * KERNEL SERVICES THAT USE THE GRU + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/proc_fs.h> +#include <linux/interrupt.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <asm/io_apic.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" +#include "grukservices.h" +#include "gru_instructions.h" +#include <asm/uv/uv_hub.h> + +/* + * Kernel GRU Usage + * + * The following is an interim algorithm for management of kernel GRU + * resources. This will likely be replaced when we better understand the + * kernel/user requirements. + * + * Blade percpu resources reserved for kernel use. These resources are + * reserved whenever the the kernel context for the blade is loaded. Note + * that the kernel context is not guaranteed to be always available. It is + * loaded on demand & can be stolen by a user if the user demand exceeds the + * kernel demand. The kernel can always reload the kernel context but + * a SLEEP may be required!!!. + * + * Async Overview: + * + * Each blade has one "kernel context" that owns GRU kernel resources + * located on the blade. Kernel drivers use GRU resources in this context + * for sending messages, zeroing memory, etc. + * + * The kernel context is dynamically loaded on demand. If it is not in + * use by the kernel, the kernel context can be unloaded & given to a user. + * The kernel context will be reloaded when needed. This may require that + * a context be stolen from a user. + * NOTE: frequent unloading/reloading of the kernel context is + * expensive. We are depending on batch schedulers, cpusets, sane + * drivers or some other mechanism to prevent the need for frequent + * stealing/reloading. + * + * The kernel context consists of two parts: + * - 1 CB & a few DSRs that are reserved for each cpu on the blade. + * Each cpu has it's own private resources & does not share them + * with other cpus. These resources are used serially, ie, + * locked, used & unlocked on each call to a function in + * grukservices. + * (Now that we have dynamic loading of kernel contexts, I + * may rethink this & allow sharing between cpus....) + * + * - Additional resources can be reserved long term & used directly + * by UV drivers located in the kernel. Drivers using these GRU + * resources can use asynchronous GRU instructions that send + * interrupts on completion. + * - these resources must be explicitly locked/unlocked + * - locked resources prevent (obviously) the kernel + * context from being unloaded. + * - drivers using these resource directly issue their own + * GRU instruction and must wait/check completion. + * + * When these resources are reserved, the caller can optionally + * associate a wait_queue with the resources and use asynchronous + * GRU instructions. When an async GRU instruction completes, the + * driver will do a wakeup on the event. + * + */ + + +#define ASYNC_HAN_TO_BID(h) ((h) - 1) +#define ASYNC_BID_TO_HAN(b) ((b) + 1) +#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] + +#define GRU_NUM_KERNEL_CBR 1 +#define GRU_NUM_KERNEL_DSR_BYTES 256 +#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ + GRU_CACHE_LINE_BYTES) + +/* GRU instruction attributes for all instructions */ +#define IMA IMA_CB_DELAY + +/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ +#define __gru_cacheline_aligned__ \ + __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) + +#define MAGIC 0x1234567887654321UL + +/* Default retry count for GRU errors on kernel instructions */ +#define EXCEPTION_RETRY_LIMIT 3 + +/* Status of message queue sections */ +#define MQS_EMPTY 0 +#define MQS_FULL 1 +#define MQS_NOOP 2 + +/*----------------- RESOURCE MANAGEMENT -------------------------------------*/ +/* optimized for x86_64 */ +struct message_queue { + union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ + int qlines; /* DW 1 */ + long hstatus[2]; + void *next __gru_cacheline_aligned__;/* CL 1 */ + void *limit; + void *start; + void *start2; + char data ____cacheline_aligned; /* CL 2 */ +}; + +/* First word in every message - used by mesq interface */ +struct message_header { + char present; + char present2; + char lines; + char fill; +}; + +#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) + +/* + * Reload the blade's kernel context into a GRU chiplet. Called holding + * the bs_kgts_sema for READ. Will steal user contexts if necessary. + */ +static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) +{ + struct gru_state *gru; + struct gru_thread_state *kgts; + void *vaddr; + int ctxnum, ncpus; + + up_read(&bs->bs_kgts_sema); + down_write(&bs->bs_kgts_sema); + + if (!bs->bs_kgts) { + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + bs->bs_kgts->ts_user_blade_id = blade_id; + } + kgts = bs->bs_kgts; + + if (!kgts->ts_gru) { + STAT(load_kernel_context); + ncpus = uv_blade_nr_possible_cpus(blade_id); + kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( + GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); + kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( + GRU_NUM_KERNEL_DSR_BYTES * ncpus + + bs->bs_async_dsr_bytes); + while (!gru_assign_gru_context(kgts)) { + msleep(1); + gru_steal_context(kgts); + } + gru_load_context(kgts); + gru = bs->bs_kgts->ts_gru; + vaddr = gru->gs_gru_base_vaddr; + ctxnum = kgts->ts_ctxnum; + bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); + bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); + } + downgrade_write(&bs->bs_kgts_sema); +} + +/* + * Free all kernel contexts that are not currently in use. + * Returns 0 if all freed, else number of inuse context. + */ +static int gru_free_kernel_contexts(void) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int bid, ret = 0; + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) { + bs = gru_base[bid]; + if (!bs) + continue; + + /* Ignore busy contexts. Don't want to block here. */ + if (down_write_trylock(&bs->bs_kgts_sema)) { + kgts = bs->bs_kgts; + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + bs->bs_kgts = NULL; + up_write(&bs->bs_kgts_sema); + kfree(kgts); + } else { + ret++; + } + } + return ret; +} + +/* + * Lock & load the kernel context for the specified blade. + */ +static struct gru_blade_state *gru_lock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + int bid; + + STAT(lock_kernel_context); +again: + bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; + bs = gru_base[bid]; + + /* Handle the case where migration occured while waiting for the sema */ + down_read(&bs->bs_kgts_sema); + if (blade_id < 0 && bid != uv_numa_blade_id()) { + up_read(&bs->bs_kgts_sema); + goto again; + } + if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) + gru_load_kernel_context(bs, bid); + return bs; + +} + +/* + * Unlock the kernel context for the specified blade. Context is not + * unloaded but may be stolen before next use. + */ +static void gru_unlock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + + bs = gru_base[blade_id]; + up_read(&bs->bs_kgts_sema); + STAT(unlock_kernel_context); +} + +/* + * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. + * - returns with preemption disabled + */ +static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) +{ + struct gru_blade_state *bs; + int lcpu; + + BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); + preempt_disable(); + bs = gru_lock_kernel_context(-1); + lcpu = uv_blade_processor_id(); + *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; + *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; + return 0; +} + +/* + * Free the current cpus reserved DSR/CBR resources. + */ +static void gru_free_cpu_resources(void *cb, void *dsr) +{ + gru_unlock_kernel_context(uv_numa_blade_id()); + preempt_enable(); +} + +/* + * Reserve GRU resources to be used asynchronously. + * Note: currently supports only 1 reservation per blade. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * output: + * handle to identify resource + * (0 = async resources already reserved) + */ +unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int ret = 0; + + bs = gru_base[blade_id]; + + down_write(&bs->bs_kgts_sema); + + /* Verify no resources already reserved */ + if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) + goto done; + bs->bs_async_dsr_bytes = dsr_bytes; + bs->bs_async_cbrs = cbrs; + bs->bs_async_wq = cmp; + kgts = bs->bs_kgts; + + /* Resources changed. Unload context if already loaded */ + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + ret = ASYNC_BID_TO_HAN(blade_id); + +done: + up_write(&bs->bs_kgts_sema); + return ret; +} + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +void gru_release_async_resources(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + down_write(&bs->bs_kgts_sema); + bs->bs_async_dsr_bytes = 0; + bs->bs_async_cbrs = 0; + bs->bs_async_wq = NULL; + up_write(&bs->bs_kgts_sema); +} + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +void gru_wait_async_cbr(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + wait_for_completion(bs->bs_async_wq); + mb(); +} + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + int blade_id = ASYNC_HAN_TO_BID(han); + int ncpus; + + gru_lock_kernel_context(blade_id); + ncpus = uv_blade_nr_possible_cpus(blade_id); + if (cb) + *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; + if (dsr) + *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; +} + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +void gru_unlock_async_resource(unsigned long han) +{ + int blade_id = ASYNC_HAN_TO_BID(han); + + gru_unlock_kernel_context(blade_id); +} + +/*----------------------------------------------------------------------*/ +int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet) +{ + struct gru_control_block_extended *cbe; + struct gru_thread_state *kgts = NULL; + unsigned long off; + int cbrnum, bid; + + /* + * Locate kgts for cb. This algorithm is SLOW but + * this function is rarely called (ie., almost never). + * Performance does not matter. + */ + for_each_possible_blade(bid) { + if (!gru_base[bid]) + break; + kgts = gru_base[bid]->bs_kgts; + if (!kgts || !kgts->ts_gru) + continue; + off = cb - kgts->ts_gru->gs_gru_base_vaddr; + if (off < GRU_SIZE) + break; + kgts = NULL; + } + BUG_ON(!kgts); + cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); + cbe = get_cbe(GRUBASE(cb), cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); + excdet->opc = cbe->opccpy; + excdet->exopc = cbe->exopccpy; + excdet->ecause = cbe->ecause; + excdet->exceptdet0 = cbe->idef1upd; + excdet->exceptdet1 = cbe->idef3upd; + gru_flush_cache(cbe); + return 0; +} + +char *gru_get_cb_exception_detail_str(int ret, void *cb, + char *buf, int size) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + + if (ret > 0 && gen->istatus == CBS_EXCEPTION) { + gru_get_cb_exception_detail(cb, &excdet); + snprintf(buf, size, + "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," + "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), + gen, excdet.opc, excdet.exopc, excdet.ecause, + excdet.exceptdet0, excdet.exceptdet1); + } else { + snprintf(buf, size, "No exception"); + } + return buf; +} + +static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) +{ + while (gen->istatus >= CBS_ACTIVE) { + cpu_relax(); + barrier(); + } + return gen->istatus; +} + +static int gru_retry_exception(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + int retry = EXCEPTION_RETRY_LIMIT; + + while (1) { + if (gru_wait_idle_or_exception(gen) == CBS_IDLE) + return CBS_IDLE; + if (gru_get_cb_message_queue_substatus(cb)) + return CBS_EXCEPTION; + gru_get_cb_exception_detail(cb, &excdet); + if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || + (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) + break; + if (retry-- == 0) + break; + gen->icmd = 1; + gru_flush_cache(gen); + } + return CBS_EXCEPTION; +} + +int gru_check_status_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gen->istatus; + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; + +} + +int gru_wait_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gru_wait_idle_or_exception(gen); + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; +} + +void gru_abort(int ret, void *cb, char *str) +{ + char buf[GRU_EXC_STR_SIZE]; + + panic("GRU FATAL ERROR: %s - %s\n", str, + gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); +} + +void gru_wait_abort_proc(void *cb) +{ + int ret; + + ret = gru_wait_proc(cb); + if (ret) + gru_abort(ret, cb, "gru_wait_abort"); +} + + +/*------------------------------ MESSAGE QUEUES -----------------------------*/ + +/* Internal status . These are NOT returned to the user. */ +#define MQIE_AGAIN -1 /* try again */ + + +/* + * Save/restore the "present" flag that is in the second line of 2-line + * messages + */ +static inline int get_present2(void *p) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + return mhdr->present; +} + +static inline void restore_present2(void *p, int val) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + mhdr->present = val; +} + +/* + * Create a message queue. + * qlines - message queue size in cache lines. Includes 2-line header. + */ +int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid) +{ + struct message_queue *mq = p; + unsigned int qlines; + + qlines = bytes / GRU_CACHE_LINE_BYTES - 2; + memset(mq, 0, bytes); + mq->start = &mq->data; + mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; + mq->next = &mq->data; + mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; + mq->qlines = qlines; + mq->hstatus[0] = 0; + mq->hstatus[1] = 1; + mq->head = gru_mesq_head(2, qlines / 2 + 1); + mqd->mq = mq; + mqd->mq_gpa = uv_gpa(mq); + mqd->qlines = qlines; + mqd->interrupt_pnode = nasid >> 1; + mqd->interrupt_vector = vector; + mqd->interrupt_apicid = apicid; + return 0; +} +EXPORT_SYMBOL_GPL(gru_create_message_queue); + +/* + * Send a NOOP message to a message queue + * Returns: + * 0 - if queue is full after the send. This is the normal case + * but various races can change this. + * -1 - if mesq sent successfully but queue not full + * >0 - unexpected error. MQE_xxx returned + */ +static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, + void *mesg) +{ + const struct message_header noop_header = { + .present = MQS_NOOP, .lines = 1}; + unsigned long m; + int substatus, ret; + struct message_header save_mhdr, *mhdr = mesg; + + STAT(mesq_noop); + save_mhdr = *mhdr; + *mhdr = noop_header; + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); + ret = gru_wait(cb); + + if (ret) { + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_noop_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_noop_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_noop_qlimit_reached); + ret = 0; + break; + case CBSS_AMO_NACKED: + STAT(mesq_noop_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_noop_put_nacked); + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, + IMA); + if (gru_wait(cb) == CBS_IDLE) + ret = MQIE_AGAIN; + else + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_noop_page_overflow); + /* fallthru */ + default: + BUG(); + } + } + *mhdr = save_mhdr; + return ret; +} + +/* + * Handle a gru_mesq full. + */ +static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + union gru_mesqhead mqh; + unsigned int limit, head; + unsigned long avalue; + int half, qlines; + + /* Determine if switching to first/second half of q */ + avalue = gru_get_amo_value(cb); + head = gru_get_amo_value_head(cb); + limit = gru_get_amo_value_limit(cb); + + qlines = mqd->qlines; + half = (limit != qlines); + + if (half) + mqh = gru_mesq_head(qlines / 2 + 1, qlines); + else + mqh = gru_mesq_head(2, qlines / 2 + 1); + + /* Try to get lock for switching head pointer */ + gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + if (!gru_get_amo_value(cb)) { + STAT(mesq_qf_locked); + return MQE_QUEUE_FULL; + } + + /* Got the lock. Send optional NOP if queue not full, */ + if (head != limit) { + if (send_noop_message(cb, mqd, mesg)) { + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), + XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + STAT(mesq_qf_noop_not_full); + return MQIE_AGAIN; + } + avalue++; + } + + /* Then flip queuehead to other half of queue. */ + gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, + IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + + /* If not successfully in swapping queue head, clear the hstatus lock */ + if (gru_get_amo_value(cb) != avalue) { + STAT(mesq_qf_switch_head_failed); + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, + IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + } + return MQIE_AGAIN; +cberr: + STAT(mesq_qf_unexpected_error); + return MQE_UNEXPECTED_CB_ERR; +} + +/* + * Handle a PUT failure. Note: if message was a 2-line message, one of the + * lines might have successfully have been written. Before sending the + * message, "present" must be cleared in BOTH lines to prevent the receiver + * from prematurely seeing the full message. + */ +static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + unsigned long m, *val = mesg, gpa, save; + int ret; + + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + if (lines == 2) { + gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + } + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + + if (!mqd->interrupt_vector) + return MQE_OK; + + /* + * Send a cross-partition interrupt to the SSI that contains the target + * message queue. Normally, the interrupt is automatically delivered by + * hardware but some error conditions require explicit delivery. + * Use the GRU to deliver the interrupt. Otherwise partition failures + * could cause unrecovered errors. + */ + gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); + save = *val; + *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, + dest_Fixed); + gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); + ret = gru_wait(cb); + *val = save; + if (ret != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + return MQE_OK; +} + +/* + * Handle a gru_mesq failure. Some of these failures are software recoverable + * or retryable. + */ +static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + int substatus, ret = 0; + + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_send_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_send_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_send_qlimit_reached); + ret = send_message_queue_full(cb, mqd, mesg, lines); + break; + case CBSS_AMO_NACKED: + STAT(mesq_send_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_send_put_nacked); + ret = send_message_put_nacked(cb, mqd, mesg, lines); + break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_page_overflow); + /* fallthru */ + default: + BUG(); + } + return ret; +} + +/* + * Send a message to a message queue + * mqd message queue descriptor + * mesg message. ust be vaddr within a GSEG + * bytes message size (<= 2 CL) + */ +int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, + unsigned int bytes) +{ + struct message_header *mhdr; + void *cb; + void *dsr; + int istatus, clines, ret; + + STAT(mesq_send); + BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); + + clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); + if (gru_get_cpu_resources(bytes, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + memcpy(dsr, mesg, bytes); + mhdr = dsr; + mhdr->present = MQS_FULL; + mhdr->lines = clines; + if (clines == 2) { + mhdr->present2 = get_present2(mhdr); + restore_present2(mhdr, MQS_FULL); + } + + do { + ret = MQE_OK; + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); + istatus = gru_wait(cb); + if (istatus != CBS_IDLE) + ret = send_message_failure(cb, mqd, dsr, clines); + } while (ret == MQIE_AGAIN); + gru_free_cpu_resources(cb, dsr); + + if (ret) + STAT(mesq_send_failed); + return ret; +} +EXPORT_SYMBOL_GPL(gru_send_message_gpa); + +/* + * Advance the receive pointer for the queue to the next message. + */ +void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) +{ + struct message_queue *mq = mqd->mq; + struct message_header *mhdr = mq->next; + void *next, *pnext; + int half = -1; + int lines = mhdr->lines; + + if (lines == 2) + restore_present2(mhdr, MQS_EMPTY); + mhdr->present = MQS_EMPTY; + + pnext = mq->next; + next = pnext + GRU_CACHE_LINE_BYTES * lines; + if (next == mq->limit) { + next = mq->start; + half = 1; + } else if (pnext < mq->start2 && next >= mq->start2) { + half = 0; + } + + if (half >= 0) + mq->hstatus[half] = 1; + mq->next = next; +} +EXPORT_SYMBOL_GPL(gru_free_message); + +/* + * Get next message from message queue. Return NULL if no message + * present. User must call next_message() to move to next message. + * rmq message queue + */ +void *gru_get_next_message(struct gru_message_queue_desc *mqd) +{ + struct message_queue *mq = mqd->mq; + struct message_header *mhdr = mq->next; + int present = mhdr->present; + + /* skip NOOP messages */ + while (present == MQS_NOOP) { + gru_free_message(mqd, mhdr); + mhdr = mq->next; + present = mhdr->present; + } + + /* Wait for both halves of 2 line messages */ + if (present == MQS_FULL && mhdr->lines == 2 && + get_present2(mhdr) == MQS_EMPTY) + present = MQS_EMPTY; + + if (!present) { + STAT(mesq_receive_none); + return NULL; + } + + if (mhdr->lines == 2) + restore_present2(mhdr, mhdr->present2); + + STAT(mesq_receive); + return mhdr; +} +EXPORT_SYMBOL_GPL(gru_get_next_message); + +/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ + +/* + * Load a DW from a global GPA. The GPA can be a memory or MMR address. + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa) +{ + void *cb; + void *dsr; + int ret, iaa; + + STAT(read_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + iaa = gpa >> 62; + gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); + ret = gru_wait(cb); + if (ret == CBS_IDLE) + *value = *(unsigned long *)dsr; + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_read_gpa); + + +/* + * Copy a block of data using the GRU resources + */ +int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes) +{ + void *cb; + void *dsr; + int ret; + + STAT(copy_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), + XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); + ret = gru_wait(cb); + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_copy_gpa); + +/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ +/* Temp - will delete after we gain confidence in the GRU */ + +static int quicktest0(unsigned long arg) +{ + unsigned long word0; + unsigned long word1; + void *cb; + void *dsr; + unsigned long *p; + int ret = -EIO; + + if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + p = dsr; + word0 = MAGIC; + word1 = 0; + + gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); + goto done; + } + + if (*p != MAGIC) { + printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); + goto done; + } + gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); + goto done; + } + + if (word0 != word1 || word1 != MAGIC) { + printk(KERN_DEBUG + "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", + smp_processor_id(), word1, MAGIC); + goto done; + } + ret = 0; + +done: + gru_free_cpu_resources(cb, dsr); + return ret; +} + +#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) + +static int quicktest1(unsigned long arg) +{ + struct gru_message_queue_desc mqd; + void *p, *mq; + unsigned long *dw; + int i, ret = -EIO; + char mes[GRU_CACHE_LINE_BYTES], *m; + + /* Need 1K cacheline aligned that does not cross page boundary */ + p = kmalloc(4096, 0); + if (p == NULL) + return -ENOMEM; + mq = ALIGNUP(p, 1024); + memset(mes, 0xee, sizeof(mes)); + dw = mq; + + gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); + for (i = 0; i < 6; i++) { + mes[8] = i; + do { + ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); + } while (ret == MQE_CONGESTION); + if (ret) + break; + } + if (ret != MQE_QUEUE_FULL || i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", + smp_processor_id(), ret, i); + goto done; + } + + for (i = 0; i < 6; i++) { + m = gru_get_next_message(&mqd); + if (!m || m[8] != i) + break; + gru_free_message(&mqd, m); + } + if (i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", + smp_processor_id(), i, m, m ? m[8] : -1); + goto done; + } + ret = 0; + +done: + kfree(p); + return ret; +} + +static int quicktest2(unsigned long arg) +{ + static DECLARE_COMPLETION(cmp); + unsigned long han; + int blade_id = 0; + int numcb = 4; + int ret = 0; + unsigned long *buf; + void *cb0, *cb; + struct gru_control_block_status *gen; + int i, k, istatus, bytes; + + bytes = numcb * 4 * 8; + buf = kmalloc(bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = -EBUSY; + han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); + if (!han) + goto done; + + gru_lock_async_resource(han, &cb0, NULL); + memset(buf, 0xee, bytes); + for (i = 0; i < numcb; i++) + gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, + XTYPE_DW, 4, 1, IMA_INTERRUPT); + + ret = 0; + k = numcb; + do { + gru_wait_async_cbr(han); + for (i = 0; i < numcb; i++) { + cb = cb0 + i * GRU_HANDLE_STRIDE; + istatus = gru_check_status(cb); + if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) + break; + } + if (i == numcb) + continue; + if (istatus != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); + ret = -EFAULT; + } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || + buf[4 * i + 3]) { + printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", + smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); + ret = -EIO; + } + k--; + gen = cb; + gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ + } while (k); + BUG_ON(cmp.done); + + gru_unlock_async_resource(han); + gru_release_async_resources(han); +done: + kfree(buf); + return ret; +} + +#define BUFSIZE 200 +static int quicktest3(unsigned long arg) +{ + char buf1[BUFSIZE], buf2[BUFSIZE]; + int ret = 0; + + memset(buf2, 0, sizeof(buf2)); + memset(buf1, get_cycles() & 255, sizeof(buf1)); + gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); + if (memcmp(buf1, buf2, BUFSIZE)) { + printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); + ret = -EIO; + } + return ret; +} + +/* + * Debugging only. User hook for various kernel tests + * of driver & gru. + */ +int gru_ktest(unsigned long arg) +{ + int ret = -EINVAL; + + switch (arg & 0xff) { + case 0: + ret = quicktest0(arg); + break; + case 1: + ret = quicktest1(arg); + break; + case 2: + ret = quicktest2(arg); + break; + case 3: + ret = quicktest3(arg); + break; + case 99: + ret = gru_free_kernel_contexts(); + break; + } + return ret; + +} + +int gru_kservices_init(void) +{ + return 0; +} + +void gru_kservices_exit(void) +{ + if (gru_free_kernel_contexts()) + BUG(); +} + diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h new file mode 100644 index 00000000000..02aa94d8484 --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.h @@ -0,0 +1,214 @@ + +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __GRU_KSERVICES_H_ +#define __GRU_KSERVICES_H_ + + +/* + * Message queues using the GRU to send/receive messages. + * + * These function allow the user to create a message queue for + * sending/receiving 1 or 2 cacheline messages using the GRU. + * + * Processes SENDING messages will use a kernel CBR/DSR to send + * the message. This is transparent to the caller. + * + * The receiver does not use any GRU resources. + * + * The functions support: + * - single receiver + * - multiple senders + * - cross partition message + * + * Missing features ZZZ: + * - user options for dealing with timeouts, queue full, etc. + * - gru_create_message_queue() needs interrupt vector info + */ + +struct gru_message_queue_desc { + void *mq; /* message queue vaddress */ + unsigned long mq_gpa; /* global address of mq */ + int qlines; /* queue size in CL */ + int interrupt_vector; /* interrupt vector */ + int interrupt_pnode; /* pnode for interrupt */ + int interrupt_apicid; /* lapicid for interrupt */ +}; + +/* + * Initialize a user allocated chunk of memory to be used as + * a message queue. The caller must ensure that the queue is + * in contiguous physical memory and is cacheline aligned. + * + * Message queue size is the total number of bytes allocated + * to the queue including a 2 cacheline header that is used + * to manage the queue. + * + * Input: + * mqd pointer to message queue descriptor + * p pointer to user allocated mesq memory. + * bytes size of message queue in bytes + * vector interrupt vector (zero if no interrupts) + * nasid nasid of blade where interrupt is delivered + * apicid apicid of cpu for interrupt + * + * Errors: + * 0 OK + * >0 error + */ +extern int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid); + +/* + * Send a message to a message queue. + * + * Note: The message queue transport mechanism uses the first 32 + * bits of the message. Users should avoid using these bits. + * + * + * Input: + * mqd pointer to message queue descriptor + * mesg pointer to message. Must be 64-bit aligned + * bytes size of message in bytes + * + * Output: + * 0 message sent + * >0 Send failure - see error codes below + * + */ +extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd, + void *mesg, unsigned int bytes); + +/* Status values for gru_send_message() */ +#define MQE_OK 0 /* message sent successfully */ +#define MQE_CONGESTION 1 /* temporary congestion, try again */ +#define MQE_QUEUE_FULL 2 /* queue is full */ +#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */ +#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */ +#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */ + +/* + * Advance the receive pointer for the message queue to the next message. + * Note: current API requires messages to be gotten & freed in order. Future + * API extensions may allow for out-of-order freeing. + * + * Input + * mqd pointer to message queue descriptor + * mesq message being freed + */ +extern void gru_free_message(struct gru_message_queue_desc *mqd, + void *mesq); + +/* + * Get next message from message queue. Returns pointer to + * message OR NULL if no message present. + * User must call gru_free_message() after message is processed + * in order to move the queue pointers to next message. + * + * Input + * mqd pointer to message queue descriptor + * + * Output: + * p pointer to message + * NULL no message available + */ +extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); + + +/* + * Read a GRU global GPA. Source can be located in a remote partition. + * + * Input: + * value memory address where MMR value is returned + * gpa source numalink physical address of GPA + * + * Output: + * 0 OK + * >0 error + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa); + + +/* + * Copy data using the GRU. Source or destination can be located in a remote + * partition. + * + * Input: + * dest_gpa destination global physical address + * src_gpa source global physical address + * bytes number of bytes to copy + * + * Output: + * 0 OK + * >0 error + */ +extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes); + +/* + * Reserve GRU resources to be used asynchronously. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * cmp - completion structure for waiting for + * async completions + * output: + * handle to identify resource + * (0 = no resources) + */ +extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp); + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +extern void gru_release_async_resources(unsigned long han); + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +extern void gru_wait_async_cbr(unsigned long han); + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr); + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +extern void gru_unlock_async_resource(unsigned long han); + +#endif /* __GRU_KSERVICES_H_ */ diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h new file mode 100644 index 00000000000..e77d1b1f9d0 --- /dev/null +++ b/drivers/misc/sgi-gru/grulib.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRULIB_H__ +#define __GRULIB_H__ + +#define GRU_BASENAME "gru" +#define GRU_FULLNAME "/dev/gru" +#define GRU_IOCTL_NUM 'G' + +/* + * Maximum number of GRU segments that a user can have open + * ZZZ temp - set high for testing. Revisit. + */ +#define GRU_MAX_OPEN_CONTEXTS 32 + +/* Set Number of Request Blocks */ +#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) + +/* Set Context Options */ +#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *) + +/* Fetch exception detail */ +#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) + +/* For user call_os handling - normally a TLB fault */ +#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *) + +/* For user unload context */ +#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) + +/* For dumpping GRU chiplet state */ +#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *) + +/* For getting gseg statistics */ +#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *) + +/* For user TLB flushing (primarily for tests) */ +#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) + +/* Get some config options (primarily for tests & emulator) */ +#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) + +/* Various kernel self-tests */ +#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *) + +#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) +#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) +#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) + +struct gru_get_gseg_statistics_req { + unsigned long gseg; + struct gru_gseg_statistics stats; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_create_context_req { + unsigned long gseg; + unsigned int data_segment_bytes; + unsigned int control_blocks; + unsigned int maximum_thread_count; + unsigned int options; + unsigned char tlb_preload_count; +}; + +/* + * Structure used to pass unload context parameters to the driver + */ +struct gru_unload_context_req { + unsigned long gseg; +}; + +/* + * Structure used to set context options + */ +enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet}; +struct gru_set_context_option_req { + unsigned long gseg; + int op; + int val0; + long val1; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_flush_tlb_req { + unsigned long gseg; + unsigned long vaddr; + size_t len; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +enum {dcs_pid, dcs_gid}; +struct gru_dump_chiplet_state_req { + unsigned int op; + unsigned int gid; + int ctxnum; + char data_opt; + char lock_cch; + char flush_cbrs; + char fill[10]; + pid_t pid; + void *buf; + size_t buflen; + /* ---- output --- */ + unsigned int num_contexts; +}; + +#define GRU_DUMP_MAGIC 0x3474ab6c +struct gru_dump_context_header { + unsigned int magic; + unsigned int gid; + unsigned char ctxnum; + unsigned char cbrcnt; + unsigned char dsrcnt; + pid_t pid; + unsigned long vaddr; + int cch_locked; + unsigned long data[0]; +}; + +/* + * GRU configuration info (temp - for testing) + */ +struct gru_config_info { + int cpus; + int blades; + int nodes; + int chiplets; + int fill[16]; +}; + +#endif /* __GRULIB_H__ */ diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c new file mode 100644 index 00000000000..f8538bbd0bf --- /dev/null +++ b/drivers/misc/sgi-gru/grumain.c @@ -0,0 +1,972 @@ +/* + * SN Platform GRU Driver + * + * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/err.h> +#include <asm/uv/uv_hub.h> +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" + +unsigned long gru_options __read_mostly; + +static struct device_driver gru_driver = { + .name = "gru" +}; + +static struct device gru_device = { + .init_name = "", + .driver = &gru_driver, +}; + +struct device *grudev = &gru_device; + +/* + * Select a gru fault map to be used by the current cpu. Note that + * multiple cpus may be using the same map. + * ZZZ should be inline but did not work on emulator + */ +int gru_cpu_fault_map_id(void) +{ +#ifdef CONFIG_IA64 + return uv_blade_processor_id() % GRU_NUM_TFM; +#else + int cpu = smp_processor_id(); + int id, core; + + core = uv_cpu_core_number(cpu); + id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + return id; +#endif +} + +/*--------- ASID Management ------------------------------------------- + * + * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. + * Once MAX is reached, flush the TLB & start over. However, + * some asids may still be in use. There won't be many (percentage wise) still + * in use. Search active contexts & determine the value of the first + * asid in use ("x"s below). Set "limit" to this value. + * This defines a block of assignable asids. + * + * When "limit" is reached, search forward from limit+1 and determine the + * next block of assignable asids. + * + * Repeat until MAX_ASID is reached, then start over again. + * + * Each time MAX_ASID is reached, increment the asid generation. Since + * the search for in-use asids only checks contexts with GRUs currently + * assigned, asids in some contexts will be missed. Prior to loading + * a context, the asid generation of the GTS asid is rechecked. If it + * doesn't match the current generation, a new asid will be assigned. + * + * 0---------------x------------x---------------------x----| + * ^-next ^-limit ^-MAX_ASID + * + * All asid manipulation & context loading/unloading is protected by the + * gs_lock. + */ + +/* Hit the asid limit. Start over */ +static int gru_wrap_asid(struct gru_state *gru) +{ + gru_dbg(grudev, "gid %d\n", gru->gs_gid); + STAT(asid_wrap); + gru->gs_asid_gen++; + return MIN_ASID; +} + +/* Find the next chunk of unused asids */ +static int gru_reset_asid_limit(struct gru_state *gru, int asid) +{ + int i, gid, inuse_asid, limit; + + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); + STAT(asid_next); + limit = MAX_ASID; + if (asid >= limit) + asid = gru_wrap_asid(gru); + gru_flush_all_tlb(gru); + gid = gru->gs_gid; +again: + for (i = 0; i < GRU_NUM_CCH; i++) { + if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i])) + continue; + inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; + gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n", + gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms, + inuse_asid, i); + if (inuse_asid == asid) { + asid += ASID_INC; + if (asid >= limit) { + /* + * empty range: reset the range limit and + * start over + */ + limit = MAX_ASID; + if (asid >= MAX_ASID) + asid = gru_wrap_asid(gru); + goto again; + } + } + + if ((inuse_asid > asid) && (inuse_asid < limit)) + limit = inuse_asid; + } + gru->gs_asid_limit = limit; + gru->gs_asid = asid; + gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid, + asid, limit); + return asid; +} + +/* Assign a new ASID to a thread context. */ +static int gru_assign_asid(struct gru_state *gru) +{ + int asid; + + gru->gs_asid += ASID_INC; + asid = gru->gs_asid; + if (asid >= gru->gs_asid_limit) + asid = gru_reset_asid_limit(gru, asid); + + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); + return asid; +} + +/* + * Clear n bits in a word. Return a word indicating the bits that were cleared. + * Optionally, build an array of chars that contain the bit numbers allocated. + */ +static unsigned long reserve_resources(unsigned long *p, int n, int mmax, + char *idx) +{ + unsigned long bits = 0; + int i; + + while (n--) { + i = find_first_bit(p, mmax); + if (i == mmax) + BUG(); + __clear_bit(i, p); + __set_bit(i, &bits); + if (idx) + *idx++ = i; + } + return bits; +} + +unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, + char *cbmap) +{ + return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, + cbmap); +} + +unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, + char *dsmap) +{ + return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, + dsmap); +} + +static void reserve_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts++; + gts->ts_cbr_map = + gru_reserve_cb_resources(gru, gts->ts_cbr_au_count, + gts->ts_cbr_idx); + gts->ts_dsr_map = + gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL); +} + +static void free_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts--; + gru->gs_cbr_map |= gts->ts_cbr_map; + gru->gs_dsr_map |= gts->ts_dsr_map; +} + +/* + * Check if a GRU has sufficient free resources to satisfy an allocation + * request. Note: GRU locks may or may not be held when this is called. If + * not held, recheck after acquiring the appropriate locks. + * + * Returns 1 if sufficient resources, 0 if not + */ +static int check_gru_resources(struct gru_state *gru, int cbr_au_count, + int dsr_au_count, int max_active_contexts) +{ + return hweight64(gru->gs_cbr_map) >= cbr_au_count + && hweight64(gru->gs_dsr_map) >= dsr_au_count + && gru->gs_active_contexts < max_active_contexts; +} + +/* + * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG + * context. + */ +static int gru_load_mm_tracker(struct gru_state *gru, + struct gru_thread_state *gts) +{ + struct gru_mm_struct *gms = gts->ts_gms; + struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; + unsigned short ctxbitmap = (1 << gts->ts_ctxnum); + int asid; + + spin_lock(&gms->ms_asid_lock); + asid = asids->mt_asid; + + spin_lock(&gru->gs_asid_lock); + if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen != + gru->gs_asid_gen)) { + asid = gru_assign_asid(gru); + asids->mt_asid = asid; + asids->mt_asid_gen = gru->gs_asid_gen; + STAT(asid_new); + } else { + STAT(asid_reuse); + } + spin_unlock(&gru->gs_asid_lock); + + BUG_ON(asids->mt_ctxbitmap & ctxbitmap); + asids->mt_ctxbitmap |= ctxbitmap; + if (!test_bit(gru->gs_gid, gms->ms_asidmap)) + __set_bit(gru->gs_gid, gms->ms_asidmap); + spin_unlock(&gms->ms_asid_lock); + + gru_dbg(grudev, + "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, asid, + gms->ms_asidmap[0]); + return asid; +} + +static void gru_unload_mm_tracker(struct gru_state *gru, + struct gru_thread_state *gts) +{ + struct gru_mm_struct *gms = gts->ts_gms; + struct gru_mm_tracker *asids; + unsigned short ctxbitmap; + + asids = &gms->ms_asids[gru->gs_gid]; + ctxbitmap = (1 << gts->ts_ctxnum); + spin_lock(&gms->ms_asid_lock); + spin_lock(&gru->gs_asid_lock); + BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); + asids->mt_ctxbitmap ^= ctxbitmap; + gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]); + spin_unlock(&gru->gs_asid_lock); + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Decrement the reference count on a GTS structure. Free the structure + * if the reference count goes to zero. + */ +void gts_drop(struct gru_thread_state *gts) +{ + if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { + if (gts->ts_gms) + gru_drop_mmu_notifier(gts->ts_gms); + kfree(gts); + STAT(gts_free); + } +} + +/* + * Locate the GTS structure for the current thread. + */ +static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data + *vdata, int tsid) +{ + struct gru_thread_state *gts; + + list_for_each_entry(gts, &vdata->vd_head, ts_next) + if (gts->ts_tsid == tsid) + return gts; + return NULL; +} + +/* + * Allocate a thread state structure. + */ +struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid) +{ + struct gru_thread_state *gts; + struct gru_mm_struct *gms; + int bytes; + + bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); + bytes += sizeof(struct gru_thread_state); + gts = kmalloc(bytes, GFP_KERNEL); + if (!gts) + return ERR_PTR(-ENOMEM); + + STAT(gts_alloc); + memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ + atomic_set(>s->ts_refcnt, 1); + mutex_init(>s->ts_ctxlock); + gts->ts_cbr_au_count = cbr_au_count; + gts->ts_dsr_au_count = dsr_au_count; + gts->ts_tlb_preload_count = tlb_preload_count; + gts->ts_user_options = options; + gts->ts_user_blade_id = -1; + gts->ts_user_chiplet_id = -1; + gts->ts_tsid = tsid; + gts->ts_ctxnum = NULLCTX; + gts->ts_tlb_int_select = -1; + gts->ts_cch_req_slice = -1; + gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); + if (vma) { + gts->ts_mm = current->mm; + gts->ts_vma = vma; + gms = gru_register_mmu_notifier(); + if (IS_ERR(gms)) + goto err; + gts->ts_gms = gms; + } + + gru_dbg(grudev, "alloc gts %p\n", gts); + return gts; + +err: + gts_drop(gts); + return ERR_CAST(gms); +} + +/* + * Allocate a vma private data structure. + */ +struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) +{ + struct gru_vma_data *vdata = NULL; + + vdata = kmalloc(sizeof(*vdata), GFP_KERNEL); + if (!vdata) + return NULL; + + STAT(vdata_alloc); + INIT_LIST_HEAD(&vdata->vd_head); + spin_lock_init(&vdata->vd_lock); + gru_dbg(grudev, "alloc vdata %p\n", vdata); + return vdata; +} + +/* + * Find the thread state structure for the current thread. + */ +struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts; + + spin_lock(&vdata->vd_lock); + gts = gru_find_current_gts_nolock(vdata, tsid); + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Allocate a new thread state for a GSEG. Note that races may allow + * another thread to race to create a gts. + */ +struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts, *ngts; + + gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, + vdata->vd_dsr_au_count, + vdata->vd_tlb_preload_count, + vdata->vd_user_options, tsid); + if (IS_ERR(gts)) + return gts; + + spin_lock(&vdata->vd_lock); + ngts = gru_find_current_gts_nolock(vdata, tsid); + if (ngts) { + gts_drop(gts); + gts = ngts; + STAT(gts_double_allocate); + } else { + list_add(>s->ts_next, &vdata->vd_head); + } + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Free the GRU context assigned to the thread state. + */ +static void gru_free_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + gru = gts->ts_gru; + gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid); + + spin_lock(&gru->gs_lock); + gru->gs_gts[gts->ts_ctxnum] = NULL; + free_gru_resources(gru, gts); + BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); + __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); + gts->ts_ctxnum = NULLCTX; + gts->ts_gru = NULL; + gts->ts_blade = -1; + spin_unlock(&gru->gs_lock); + + gts_drop(gts); + STAT(free_context); +} + +/* + * Prefetching cachelines help hardware performance. + * (Strictly a performance enhancement. Not functionally required). + */ +static void prefetch_data(void *p, int num, int stride) +{ + while (num-- > 0) { + prefetchw(p); + p += stride; + } +} + +static inline long gru_copy_handle(void *d, void *s) +{ + memcpy(d, s, GRU_HANDLE_BYTES); + return GRU_HANDLE_BYTES; +} + +static void gru_prefetch_context(void *gseg, void *cb, void *cbe, + unsigned long cbrmap, unsigned long length) +{ + int i, scr; + + prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, + GRU_CACHE_LINE_BYTES); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); + prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, + GRU_CACHE_LINE_BYTES); + cb += GRU_HANDLE_STRIDE; + } +} + +static void gru_load_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap, + int data_valid) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + if (data_valid) { + save += gru_copy_handle(cb, save); + save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, + save); + } else { + memset(cb, 0, GRU_CACHE_LINE_BYTES); + memset(cbe + i * GRU_HANDLE_STRIDE, 0, + GRU_CACHE_LINE_BYTES); + } + /* Flush CBE to hide race in context restart */ + mb(); + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + + if (data_valid) + memcpy(gseg + GRU_DS_BASE, save, length); + else + memset(gseg + GRU_DS_BASE, 0, length); +} + +static void gru_unload_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + + /* CBEs may not be coherent. Flush them from cache */ + for_each_cbr_in_allocation_map(i, &cbrmap, scr) + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + mb(); /* Let the CL flush complete */ + + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + save += gru_copy_handle(save, cb); + save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + memcpy(save, gseg + GRU_DS_BASE, length); +} + +void gru_unload_context(struct gru_thread_state *gts, int savestate) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int ctxnum = gts->ts_ctxnum; + + if (!is_kernel_context(gts)) + zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n", + gts, gts->ts_cbr_map, gts->ts_dsr_map); + lock_cch_handle(cch); + if (cch_interrupt_sync(cch)) + BUG(); + + if (!is_kernel_context(gts)) + gru_unload_mm_tracker(gru, gts); + if (savestate) { + gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, + ctxnum, gts->ts_cbr_map, + gts->ts_dsr_map); + gts->ts_data_valid = 1; + } + + if (cch_deallocate(cch)) + BUG(); + unlock_cch_handle(cch); + + gru_free_gru_context(gts); +} + +/* + * Load a GRU context by copying it from the thread data structure in memory + * to the GRU. + */ +void gru_load_context(struct gru_thread_state *gts) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int i, err, asid, ctxnum = gts->ts_ctxnum; + + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + lock_cch_handle(cch); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch->tlb_int_enable) { + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gts->ts_tlb_int_select; + } + if (gts->ts_cch_req_slice >= 0) { + cch->req_slice_set_enable = 1; + cch->req_slice = gts->ts_cch_req_slice; + } else { + cch->req_slice_set_enable =0; + } + cch->tfm_done_bit_enable = 0; + cch->dsr_allocation_map = gts->ts_dsr_map; + cch->cbr_allocation_map = gts->ts_cbr_map; + + if (is_kernel_context(gts)) { + cch->unmap_enable = 1; + cch->tfm_done_bit_enable = 1; + cch->cb_int_enable = 1; + cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ + } else { + cch->unmap_enable = 0; + cch->tfm_done_bit_enable = 0; + cch->cb_int_enable = 0; + asid = gru_load_mm_tracker(gru, gts); + for (i = 0; i < 8; i++) { + cch->asid[i] = asid + i; + cch->sizeavail[i] = gts->ts_sizeavail; + } + } + + err = cch_allocate(cch); + if (err) { + gru_dbg(grudev, + "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", + err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); + BUG(); + } + + gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, + gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid); + + if (cch_start(cch)) + BUG(); + unlock_cch_handle(cch); + + gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n", + gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, + (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); +} + +/* + * Update fields in an active CCH: + * - retarget interrupts on local blade + * - update sizeavail mask + */ +int gru_update_cch(struct gru_thread_state *gts) +{ + struct gru_context_configuration_handle *cch; + struct gru_state *gru = gts->ts_gru; + int i, ctxnum = gts->ts_ctxnum, ret = 0; + + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + lock_cch_handle(cch); + if (cch->state == CCHSTATE_ACTIVE) { + if (gru->gs_gts[gts->ts_ctxnum] != gts) + goto exit; + if (cch_interrupt(cch)) + BUG(); + for (i = 0; i < 8; i++) + cch->sizeavail[i] = gts->ts_sizeavail; + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gru_cpu_fault_map_id(); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch_start(cch)) + BUG(); + ret = 1; + } +exit: + unlock_cch_handle(cch); + return ret; +} + +/* + * Update CCH tlb interrupt select. Required when all the following is true: + * - task's GRU context is loaded into a GRU + * - task is using interrupt notification for TLB faults + * - task has migrated to a different cpu on the same blade where + * it was previously running. + */ +static int gru_retarget_intr(struct gru_thread_state *gts) +{ + if (gts->ts_tlb_int_select < 0 + || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) + return 0; + + gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, + gru_cpu_fault_map_id()); + return gru_update_cch(gts); +} + +/* + * Check if a GRU context is allowed to use a specific chiplet. By default + * a context is assigned to any blade-local chiplet. However, users can + * override this. + * Returns 1 if assignment allowed, 0 otherwise + */ +static int gru_check_chiplet_assignment(struct gru_state *gru, + struct gru_thread_state *gts) +{ + int blade_id; + int chiplet_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + + chiplet_id = gts->ts_user_chiplet_id; + return gru->gs_blade_id == blade_id && + (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); +} + +/* + * Unload the gru context if it is not assigned to the correct blade or + * chiplet. Misassignment can occur if the process migrates to a different + * blade or if the user changes the selected blade/chiplet. + */ +void gru_check_context_placement(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + /* + * If the current task is the context owner, verify that the + * context is correctly placed. This test is skipped for non-owner + * references. Pthread apps use non-owner references to the CBRs. + */ + gru = gts->ts_gru; + if (!gru || gts->ts_tgid_owner != current->tgid) + return; + + if (!gru_check_chiplet_assignment(gru, gts)) { + STAT(check_context_unload); + gru_unload_context(gts, 1); + } else if (gru_retarget_intr(gts)) { + STAT(check_context_retarget_intr); + } +} + + +/* + * Insufficient GRU resources available on the local blade. Steal a context from + * a process. This is a hack until a _real_ resource scheduler is written.... + */ +#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) +#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ + ((g)+1) : &(b)->bs_grus[0]) + +static int is_gts_stealable(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) + return down_write_trylock(&bs->bs_kgts_sema); + else + return mutex_trylock(>s->ts_ctxlock); +} + +static void gts_stolen(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) { + up_write(&bs->bs_kgts_sema); + STAT(steal_kernel_context); + } else { + mutex_unlock(>s->ts_ctxlock); + STAT(steal_user_context); + } +} + +void gru_steal_context(struct gru_thread_state *gts) +{ + struct gru_blade_state *blade; + struct gru_state *gru, *gru0; + struct gru_thread_state *ngts = NULL; + int ctxnum, ctxnum0, flag = 0, cbr, dsr; + int blade_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + cbr = gts->ts_cbr_au_count; + dsr = gts->ts_dsr_au_count; + + blade = gru_base[blade_id]; + spin_lock(&blade->bs_lock); + + ctxnum = next_ctxnum(blade->bs_lru_ctxnum); + gru = blade->bs_lru_gru; + if (ctxnum == 0) + gru = next_gru(blade, gru); + blade->bs_lru_gru = gru; + blade->bs_lru_ctxnum = ctxnum; + ctxnum0 = ctxnum; + gru0 = gru; + while (1) { + if (gru_check_chiplet_assignment(gru, gts)) { + if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) + break; + spin_lock(&gru->gs_lock); + for (; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (flag && gru == gru0 && ctxnum == ctxnum0) + break; + ngts = gru->gs_gts[ctxnum]; + /* + * We are grabbing locks out of order, so trylock is + * needed. GTSs are usually not locked, so the odds of + * success are high. If trylock fails, try to steal a + * different GSEG. + */ + if (ngts && is_gts_stealable(ngts, blade)) + break; + ngts = NULL; + } + spin_unlock(&gru->gs_lock); + if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) + break; + } + if (flag && gru == gru0) + break; + flag = 1; + ctxnum = 0; + gru = next_gru(blade, gru); + } + spin_unlock(&blade->bs_lock); + + if (ngts) { + gts->ustats.context_stolen++; + ngts->ts_steal_jiffies = jiffies; + gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1); + gts_stolen(ngts, blade); + } else { + STAT(steal_context_failed); + } + gru_dbg(grudev, + "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;" + " avail cb %ld, ds %ld\n", + gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), + hweight64(gru->gs_dsr_map)); +} + +/* + * Assign a gru context. + */ +static int gru_assign_context_number(struct gru_state *gru) +{ + int ctxnum; + + ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); + __set_bit(ctxnum, &gru->gs_context_map); + return ctxnum; +} + +/* + * Scan the GRUs on the local blade & assign a GRU context. + */ +struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru, *grux; + int i, max_active_contexts; + int blade_id = gts->ts_user_blade_id; + + if (blade_id < 0) + blade_id = uv_numa_blade_id(); +again: + gru = NULL; + max_active_contexts = GRU_NUM_CCH; + for_each_gru_on_blade(grux, blade_id, i) { + if (!gru_check_chiplet_assignment(grux, gts)) + continue; + if (check_gru_resources(grux, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, + max_active_contexts)) { + gru = grux; + max_active_contexts = grux->gs_active_contexts; + if (max_active_contexts == 0) + break; + } + } + + if (gru) { + spin_lock(&gru->gs_lock); + if (!check_gru_resources(gru, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, GRU_NUM_CCH)) { + spin_unlock(&gru->gs_lock); + goto again; + } + reserve_gru_resources(gru, gts); + gts->ts_gru = gru; + gts->ts_blade = gru->gs_blade_id; + gts->ts_ctxnum = gru_assign_context_number(gru); + atomic_inc(>s->ts_refcnt); + gru->gs_gts[gts->ts_ctxnum] = gts; + spin_unlock(&gru->gs_lock); + + STAT(assign_context); + gru_dbg(grudev, + "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n", + gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, + gts->ts_gru->gs_gid, gts->ts_ctxnum, + gts->ts_cbr_au_count, gts->ts_dsr_au_count); + } else { + gru_dbg(grudev, "failed to allocate a GTS %s\n", ""); + STAT(assign_context_failed); + } + + return gru; +} + +/* + * gru_nopage + * + * Map the user's GRU segment + * + * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. + */ +int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct gru_thread_state *gts; + unsigned long paddr, vaddr; + + vaddr = (unsigned long)vmf->virtual_address; + gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", + vma, vaddr, GSEG_BASE(vaddr)); + STAT(nopfn); + + /* The following check ensures vaddr is a valid address in the VMA */ + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (!gts) + return VM_FAULT_SIGBUS; + +again: + mutex_lock(>s->ts_ctxlock); + preempt_disable(); + + gru_check_context_placement(gts); + + if (!gts->ts_gru) { + STAT(load_user_context); + if (!gru_assign_gru_context(gts)) { + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ + if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) + gru_steal_context(gts); + goto again; + } + gru_load_context(gts); + paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); + remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1), + paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, + vma->vm_page_prot); + } + + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + + return VM_FAULT_NOPAGE; +} + diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c new file mode 100644 index 00000000000..7768b87d995 --- /dev/null +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -0,0 +1,381 @@ +/* + * SN Platform GRU Driver + * + * PROC INTERFACES + * + * This file supports the /proc interfaces for the GRU driver + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/proc_fs.h> +#include <linux/device.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#define printstat(s, f) printstat_val(s, &gru_stats.f, #f) + +static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) +{ + unsigned long val = atomic_long_read(v); + + seq_printf(s, "%16lu %s\n", val, id); +} + +static int statistics_show(struct seq_file *s, void *p) +{ + printstat(s, vdata_alloc); + printstat(s, vdata_free); + printstat(s, gts_alloc); + printstat(s, gts_free); + printstat(s, gms_alloc); + printstat(s, gms_free); + printstat(s, gts_double_allocate); + printstat(s, assign_context); + printstat(s, assign_context_failed); + printstat(s, free_context); + printstat(s, load_user_context); + printstat(s, load_kernel_context); + printstat(s, lock_kernel_context); + printstat(s, unlock_kernel_context); + printstat(s, steal_user_context); + printstat(s, steal_kernel_context); + printstat(s, steal_context_failed); + printstat(s, nopfn); + printstat(s, asid_new); + printstat(s, asid_next); + printstat(s, asid_wrap); + printstat(s, asid_reuse); + printstat(s, intr); + printstat(s, intr_cbr); + printstat(s, intr_tfh); + printstat(s, intr_spurious); + printstat(s, intr_mm_lock_failed); + printstat(s, call_os); + printstat(s, call_os_wait_queue); + printstat(s, user_flush_tlb); + printstat(s, user_unload_context); + printstat(s, user_exception); + printstat(s, set_context_option); + printstat(s, check_context_retarget_intr); + printstat(s, check_context_unload); + printstat(s, tlb_dropin); + printstat(s, tlb_preload_page); + printstat(s, tlb_dropin_fail_no_asid); + printstat(s, tlb_dropin_fail_upm); + printstat(s, tlb_dropin_fail_invalid); + printstat(s, tlb_dropin_fail_range_active); + printstat(s, tlb_dropin_fail_idle); + printstat(s, tlb_dropin_fail_fmm); + printstat(s, tlb_dropin_fail_no_exception); + printstat(s, tfh_stale_on_fault); + printstat(s, mmu_invalidate_range); + printstat(s, mmu_invalidate_page); + printstat(s, flush_tlb); + printstat(s, flush_tlb_gru); + printstat(s, flush_tlb_gru_tgh); + printstat(s, flush_tlb_gru_zero_asid); + printstat(s, copy_gpa); + printstat(s, read_gpa); + printstat(s, mesq_receive); + printstat(s, mesq_receive_none); + printstat(s, mesq_send); + printstat(s, mesq_send_failed); + printstat(s, mesq_noop); + printstat(s, mesq_send_unexpected_error); + printstat(s, mesq_send_lb_overflow); + printstat(s, mesq_send_qlimit_reached); + printstat(s, mesq_send_amo_nacked); + printstat(s, mesq_send_put_nacked); + printstat(s, mesq_qf_locked); + printstat(s, mesq_qf_noop_not_full); + printstat(s, mesq_qf_switch_head_failed); + printstat(s, mesq_qf_unexpected_error); + printstat(s, mesq_noop_unexpected_error); + printstat(s, mesq_noop_lb_overflow); + printstat(s, mesq_noop_qlimit_reached); + printstat(s, mesq_noop_amo_nacked); + printstat(s, mesq_noop_put_nacked); + printstat(s, mesq_noop_page_overflow); + return 0; +} + +static ssize_t statistics_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + memset(&gru_stats, 0, sizeof(gru_stats)); + return count; +} + +static int mcs_statistics_show(struct seq_file *s, void *p) +{ + int op; + unsigned long total, count, max; + static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", + "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", + "tfh_write_restart", "tgh_invalidate"}; + + seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); + for (op = 0; op < mcsop_last; op++) { + count = atomic_long_read(&mcs_op_statistics[op].count); + total = atomic_long_read(&mcs_op_statistics[op].total); + max = mcs_op_statistics[op].max; + seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count, + count ? total / count : 0, max); + } + return 0; +} + +static ssize_t mcs_statistics_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *data) +{ + memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics)); + return count; +} + +static int options_show(struct seq_file *s, void *p) +{ + seq_printf(s, "#bitmask: 1=trace, 2=statistics\n"); + seq_printf(s, "0x%lx\n", gru_options); + return 0; +} + +static ssize_t options_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + char buf[20]; + + if (count >= sizeof(buf)) + return -EINVAL; + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + buf[count] = '\0'; + if (strict_strtoul(buf, 0, &gru_options)) + return -EINVAL; + + return count; +} + +static int cch_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data; + int i; + struct gru_state *gru = GID_TO_GRU(gid); + struct gru_thread_state *ts; + const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; + + if (gid == 0) + seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", + "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); + if (gru) + for (i = 0; i < GRU_NUM_CCH; i++) { + ts = gru->gs_gts[i]; + if (!ts) + continue; + seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n", + gru->gs_gid, gru->gs_blade_id, i, + is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid, + is_kernel_context(ts) ? 0 : ts->ts_tgid_owner, + ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, + ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, + mode[ts->ts_user_options & + GRU_OPT_MISS_MASK]); + } + + return 0; +} + +static int gru_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data, ctxfree, cbrfree, dsrfree; + struct gru_state *gru = GID_TO_GRU(gid); + + if (gid == 0) { + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", + "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", + "busy", "busy", "free", "free", "free"); + } + if (gru) { + ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; + cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n", + gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree, + GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree, + ctxfree, cbrfree, dsrfree); + } + + return 0; +} + +static void seq_stop(struct seq_file *file, void *data) +{ +} + +static void *seq_start(struct seq_file *file, loff_t *gid) +{ + if (*gid < gru_max_gids) + return gid; + return NULL; +} + +static void *seq_next(struct seq_file *file, void *data, loff_t *gid) +{ + (*gid)++; + if (*gid < gru_max_gids) + return gid; + return NULL; +} + +static const struct seq_operations cch_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = cch_seq_show +}; + +static const struct seq_operations gru_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = gru_seq_show +}; + +static int statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, statistics_show, NULL); +} + +static int mcs_statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, mcs_statistics_show, NULL); +} + +static int options_open(struct inode *inode, struct file *file) +{ + return single_open(file, options_show, NULL); +} + +static int cch_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cch_seq_ops); +} + +static int gru_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &gru_seq_ops); +} + +/* *INDENT-OFF* */ +static const struct file_operations statistics_fops = { + .open = statistics_open, + .read = seq_read, + .write = statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations mcs_statistics_fops = { + .open = mcs_statistics_open, + .read = seq_read, + .write = mcs_statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations options_fops = { + .open = options_open, + .read = seq_read, + .write = options_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations cch_fops = { + .open = cch_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +static const struct file_operations gru_fops = { + .open = gru_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_entry { + char *name; + int mode; + const struct file_operations *fops; + struct proc_dir_entry *entry; +} proc_files[] = { + {"statistics", 0644, &statistics_fops}, + {"mcs_statistics", 0644, &mcs_statistics_fops}, + {"debug_options", 0644, &options_fops}, + {"cch_status", 0444, &cch_fops}, + {"gru_status", 0444, &gru_fops}, + {NULL} +}; +/* *INDENT-ON* */ + +static struct proc_dir_entry *proc_gru __read_mostly; + +static int create_proc_file(struct proc_entry *p) +{ + p->entry = proc_create(p->name, p->mode, proc_gru, p->fops); + if (!p->entry) + return -1; + return 0; +} + +static void delete_proc_files(void) +{ + struct proc_entry *p; + + if (proc_gru) { + for (p = proc_files; p->name; p++) + if (p->entry) + remove_proc_entry(p->name, proc_gru); + remove_proc_entry("gru", proc_gru->parent); + } +} + +int gru_proc_init(void) +{ + struct proc_entry *p; + + proc_gru = proc_mkdir("sgi_uv/gru", NULL); + + for (p = proc_files; p->name; p++) + if (create_proc_file(p)) + goto err; + return 0; + +err: + delete_proc_files(); + return -1; +} + +void gru_proc_exit(void) +{ + delete_proc_files(); +} diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h new file mode 100644 index 00000000000..02a77b8b8ee --- /dev/null +++ b/drivers/misc/sgi-gru/grutables.h @@ -0,0 +1,685 @@ +/* + * SN Platform GRU Driver + * + * GRU DRIVER TABLES, MACROS, externs, etc + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUTABLES_H__ +#define __GRUTABLES_H__ + +/* + * GRU Chiplet: + * The GRU is a user addressible memory accelerator. It provides + * several forms of load, store, memset, bcopy instructions. In addition, it + * contains special instructions for AMOs, sending messages to message + * queues, etc. + * + * The GRU is an integral part of the node controller. It connects + * directly to the cpu socket. In its current implementation, there are 2 + * GRU chiplets in the node controller on each blade (~node). + * + * The entire GRU memory space is fully coherent and cacheable by the cpus. + * + * Each GRU chiplet has a physical memory map that looks like the following: + * + * +-----------------+ + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * +-----------------+ + * | system control | + * +-----------------+ _______ +-------------+ + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / | instructions| + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / |-------------| + * |/////////////////| / | | + * +-----------------+ | | + * | context 15 | | data | + * +-----------------+ | | + * | ...... | \ | | + * +-----------------+ \____________ +-------------+ + * | context 1 | + * +-----------------+ + * | context 0 | + * +-----------------+ + * + * Each of the "contexts" is a chunk of memory that can be mmaped into user + * space. The context consists of 2 parts: + * + * - an instruction space that can be directly accessed by the user + * to issue GRU instructions and to check instruction status. + * + * - a data area that acts as normal RAM. + * + * User instructions contain virtual addresses of data to be accessed by the + * GRU. The GRU contains a TLB that is used to convert these user virtual + * addresses to physical addresses. + * + * The "system control" area of the GRU chiplet is used by the kernel driver + * to manage user contexts and to perform functions such as TLB dropin and + * purging. + * + * One context may be reserved for the kernel and used for cross-partition + * communication. The GRU will also be used to asynchronously zero out + * large blocks of memory (not currently implemented). + * + * + * Tables: + * + * VDATA-VMA Data - Holds a few parameters. Head of linked list of + * GTS tables for threads using the GSEG + * GTS - Gru Thread State - contains info for managing a GSEG context. A + * GTS is allocated for each thread accessing a + * GSEG. + * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is + * not loaded into a GRU + * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs + * where a GSEG has been loaded. Similar to + * an mm_struct but for GRU. + * + * GS - GRU State - Used to manage the state of a GRU chiplet + * BS - Blade State - Used to manage state of all GRU chiplets + * on a blade + * + * + * Normal task tables for task using GRU. + * - 2 threads in process + * - 2 GSEGs open in process + * - GSEG1 is being used by both threads + * - GSEG2 is used only by thread 2 + * + * task -->| + * task ---+---> mm ->------ (notifier) -------+-> gms + * | | + * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) + * | | | + * | +-> gts--->| GSEG1 (thread2) + * | | + * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) + * . + * . + * + * GSEGs are marked DONTCOPY on fork + * + * At open + * file.private_data -> NULL + * + * At mmap, + * vma -> vdata + * + * After gseg reference + * vma -> vdata ->gts + * + * After fork + * parent + * vma -> vdata -> gts + * child + * (vma is not copied) + * + */ + +#include <linux/rmap.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/wait.h> +#include <linux/mmu_notifier.h> +#include "gru.h" +#include "grulib.h" +#include "gruhandles.h" + +extern struct gru_stats_s gru_stats; +extern struct gru_blade_state *gru_base[]; +extern unsigned long gru_start_paddr, gru_end_paddr; +extern void *gru_start_vaddr; +extern unsigned int gru_max_gids; + +#define GRU_MAX_BLADES MAX_NUMNODES +#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) + +#define GRU_DRIVER_ID_STR "SGI GRU Device Driver" +#define GRU_DRIVER_VERSION_STR "0.85" + +/* + * GRU statistics. + */ +struct gru_stats_s { + atomic_long_t vdata_alloc; + atomic_long_t vdata_free; + atomic_long_t gts_alloc; + atomic_long_t gts_free; + atomic_long_t gms_alloc; + atomic_long_t gms_free; + atomic_long_t gts_double_allocate; + atomic_long_t assign_context; + atomic_long_t assign_context_failed; + atomic_long_t free_context; + atomic_long_t load_user_context; + atomic_long_t load_kernel_context; + atomic_long_t lock_kernel_context; + atomic_long_t unlock_kernel_context; + atomic_long_t steal_user_context; + atomic_long_t steal_kernel_context; + atomic_long_t steal_context_failed; + atomic_long_t nopfn; + atomic_long_t asid_new; + atomic_long_t asid_next; + atomic_long_t asid_wrap; + atomic_long_t asid_reuse; + atomic_long_t intr; + atomic_long_t intr_cbr; + atomic_long_t intr_tfh; + atomic_long_t intr_spurious; + atomic_long_t intr_mm_lock_failed; + atomic_long_t call_os; + atomic_long_t call_os_wait_queue; + atomic_long_t user_flush_tlb; + atomic_long_t user_unload_context; + atomic_long_t user_exception; + atomic_long_t set_context_option; + atomic_long_t check_context_retarget_intr; + atomic_long_t check_context_unload; + atomic_long_t tlb_dropin; + atomic_long_t tlb_preload_page; + atomic_long_t tlb_dropin_fail_no_asid; + atomic_long_t tlb_dropin_fail_upm; + atomic_long_t tlb_dropin_fail_invalid; + atomic_long_t tlb_dropin_fail_range_active; + atomic_long_t tlb_dropin_fail_idle; + atomic_long_t tlb_dropin_fail_fmm; + atomic_long_t tlb_dropin_fail_no_exception; + atomic_long_t tfh_stale_on_fault; + atomic_long_t mmu_invalidate_range; + atomic_long_t mmu_invalidate_page; + atomic_long_t flush_tlb; + atomic_long_t flush_tlb_gru; + atomic_long_t flush_tlb_gru_tgh; + atomic_long_t flush_tlb_gru_zero_asid; + + atomic_long_t copy_gpa; + atomic_long_t read_gpa; + + atomic_long_t mesq_receive; + atomic_long_t mesq_receive_none; + atomic_long_t mesq_send; + atomic_long_t mesq_send_failed; + atomic_long_t mesq_noop; + atomic_long_t mesq_send_unexpected_error; + atomic_long_t mesq_send_lb_overflow; + atomic_long_t mesq_send_qlimit_reached; + atomic_long_t mesq_send_amo_nacked; + atomic_long_t mesq_send_put_nacked; + atomic_long_t mesq_page_overflow; + atomic_long_t mesq_qf_locked; + atomic_long_t mesq_qf_noop_not_full; + atomic_long_t mesq_qf_switch_head_failed; + atomic_long_t mesq_qf_unexpected_error; + atomic_long_t mesq_noop_unexpected_error; + atomic_long_t mesq_noop_lb_overflow; + atomic_long_t mesq_noop_qlimit_reached; + atomic_long_t mesq_noop_amo_nacked; + atomic_long_t mesq_noop_put_nacked; + atomic_long_t mesq_noop_page_overflow; + +}; + +enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, + cchop_deallocate, tfhop_write_only, tfhop_write_restart, + tghop_invalidate, mcsop_last}; + +struct mcs_op_statistic { + atomic_long_t count; + atomic_long_t total; + unsigned long max; +}; + +extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + +#define OPT_DPRINT 1 +#define OPT_STATS 2 + + +#define IRQ_GRU 110 /* Starting IRQ number for interrupts */ + +/* Delay in jiffies between attempts to assign a GRU context */ +#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) + +/* + * If a process has it's context stolen, min delay in jiffies before trying to + * steal a context from another process. + */ +#define GRU_STEAL_DELAY ((HZ * 200) / 1000) + +#define STAT(id) do { \ + if (gru_options & OPT_STATS) \ + atomic_long_inc(&gru_stats.id); \ + } while (0) + +#ifdef CONFIG_SGI_GRU_DEBUG +#define gru_dbg(dev, fmt, x...) \ + do { \ + if (gru_options & OPT_DPRINT) \ + printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ + } while (0) +#else +#define gru_dbg(x...) +#endif + +/*----------------------------------------------------------------------------- + * ASID management + */ +#define MAX_ASID 0xfffff0 +#define MIN_ASID 8 +#define ASID_INC 8 /* number of regions */ + +/* Generate a GRU asid value from a GRU base asid & a virtual address. */ +#define VADDR_HI_BIT 64 +#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) +#define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) + +/*------------------------------------------------------------------------------ + * File & VMS Tables + */ + +struct gru_state; + +/* + * This structure is pointed to from the mmstruct via the notifier pointer. + * There is one of these per address space. + */ +struct gru_mm_tracker { /* pack to reduce size */ + unsigned int mt_asid_gen:24; /* ASID wrap count */ + unsigned int mt_asid:24; /* current base ASID for gru */ + unsigned short mt_ctxbitmap:16;/* bitmap of contexts using + asid */ +} __attribute__ ((packed)); + +struct gru_mm_struct { + struct mmu_notifier ms_notifier; + atomic_t ms_refcnt; + spinlock_t ms_asid_lock; /* protects ASID assignment */ + atomic_t ms_range_active;/* num range_invals active */ + char ms_released; + wait_queue_head_t ms_wait_queue; + DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); + struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; +}; + +/* + * One of these structures is allocated when a GSEG is mmaped. The + * structure is pointed to by the vma->vm_private_data field in the vma struct. + */ +struct gru_vma_data { + spinlock_t vd_lock; /* Serialize access to vma */ + struct list_head vd_head; /* head of linked list of gts */ + long vd_user_options;/* misc user option flags */ + int vd_cbr_au_count; + int vd_dsr_au_count; + unsigned char vd_tlb_preload_count; +}; + +/* + * One of these is allocated for each thread accessing a mmaped GRU. A linked + * list of these structure is hung off the struct gru_vma_data in the mm_struct. + */ +struct gru_thread_state { + struct list_head ts_next; /* list - head at vma-private */ + struct mutex ts_ctxlock; /* load/unload CTX lock */ + struct mm_struct *ts_mm; /* mm currently mapped to + context */ + struct vm_area_struct *ts_vma; /* vma of GRU context */ + struct gru_state *ts_gru; /* GRU where the context is + loaded */ + struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ + unsigned char ts_tlb_preload_count; /* TLB preload pages */ + unsigned long ts_cbr_map; /* map of allocated CBRs */ + unsigned long ts_dsr_map; /* map of allocated DATA + resources */ + unsigned long ts_steal_jiffies;/* jiffies when context last + stolen */ + long ts_user_options;/* misc user option flags */ + pid_t ts_tgid_owner; /* task that is using the + context - for migration */ + short ts_user_blade_id;/* user selected blade */ + char ts_user_chiplet_id;/* user selected chiplet */ + unsigned short ts_sizeavail; /* Pagesizes in use */ + int ts_tsid; /* thread that owns the + structure */ + int ts_tlb_int_select;/* target cpu if interrupts + enabled */ + int ts_ctxnum; /* context number where the + context is loaded */ + atomic_t ts_refcnt; /* reference count GTS */ + unsigned char ts_dsr_au_count;/* Number of DSR resources + required for contest */ + unsigned char ts_cbr_au_count;/* Number of CBR resources + required for contest */ + char ts_cch_req_slice;/* CCH packet slice */ + char ts_blade; /* If >= 0, migrate context if + ref from diferent blade */ + char ts_force_cch_reload; + char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each + allocated CB */ + int ts_data_valid; /* Indicates if ts_gdata has + valid data */ + struct gru_gseg_statistics ustats; /* User statistics */ + unsigned long ts_gdata[0]; /* save area for GRU data (CB, + DS, CBE) */ +}; + +/* + * Threaded programs actually allocate an array of GSEGs when a context is + * created. Each thread uses a separate GSEG. TSID is the index into the GSEG + * array. + */ +#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE) +#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \ + (gts)->ts_tsid * GRU_GSEG_PAGESIZE) + +#define NULLCTX (-1) /* if context not loaded into GRU */ + +/*----------------------------------------------------------------------------- + * GRU State Tables + */ + +/* + * One of these exists for each GRU chiplet. + */ +struct gru_state { + struct gru_blade_state *gs_blade; /* GRU state for entire + blade */ + unsigned long gs_gru_base_paddr; /* Physical address of + gru segments (64) */ + void *gs_gru_base_vaddr; /* Virtual address of + gru segments (64) */ + unsigned short gs_gid; /* unique GRU number */ + unsigned short gs_blade_id; /* blade of GRU */ + unsigned char gs_chiplet_id; /* blade chiplet of GRU */ + unsigned char gs_tgh_local_shift; /* used to pick TGH for + local flush */ + unsigned char gs_tgh_first_remote; /* starting TGH# for + remote flush */ + spinlock_t gs_asid_lock; /* lock used for + assigning asids */ + spinlock_t gs_lock; /* lock used for + assigning contexts */ + + /* -- the following are protected by the gs_asid_lock spinlock ---- */ + unsigned int gs_asid; /* Next availe ASID */ + unsigned int gs_asid_limit; /* Limit of available + ASIDs */ + unsigned int gs_asid_gen; /* asid generation. + Inc on wrap */ + + /* --- the following fields are protected by the gs_lock spinlock --- */ + unsigned long gs_context_map; /* bitmap to manage + contexts in use */ + unsigned long gs_cbr_map; /* bitmap to manage CB + resources */ + unsigned long gs_dsr_map; /* bitmap used to manage + DATA resources */ + unsigned int gs_reserved_cbrs; /* Number of kernel- + reserved cbrs */ + unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel- + reserved dsrs */ + unsigned short gs_active_contexts; /* number of contexts + in use */ + struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using + the context */ + int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ +}; + +/* + * This structure contains the GRU state for all the GRUs on a blade. + */ +struct gru_blade_state { + void *kernel_cb; /* First kernel + reserved cb */ + void *kernel_dsr; /* First kernel + reserved DSR */ + struct rw_semaphore bs_kgts_sema; /* lock for kgts */ + struct gru_thread_state *bs_kgts; /* GTS for kernel use */ + + /* ---- the following are used for managing kernel async GRU CBRs --- */ + int bs_async_dsr_bytes; /* DSRs for async */ + int bs_async_cbrs; /* CBRs AU for async */ + struct completion *bs_async_wq; + + /* ---- the following are protected by the bs_lock spinlock ---- */ + spinlock_t bs_lock; /* lock used for + stealing contexts */ + int bs_lru_ctxnum; /* STEAL - last context + stolen */ + struct gru_state *bs_lru_gru; /* STEAL - last gru + stolen */ + + struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; +}; + +/*----------------------------------------------------------------------------- + * Address Primitives + */ +#define get_tfm_for_cpu(g, c) \ + ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c))) +#define get_tfh_by_index(g, i) \ + ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i))) +#define get_tgh_by_index(g, i) \ + ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i))) +#define get_cbe_by_index(g, i) \ + ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\ + (i))) + +/*----------------------------------------------------------------------------- + * Useful Macros + */ + +/* Given a blade# & chiplet#, get a pointer to the GRU */ +#define get_gru(b, c) (&gru_base[b]->bs_grus[c]) + +/* Number of bytes to save/restore when unloading/loading GRU contexts */ +#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) +#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) + +/* Convert a user CB number to the actual CBRNUM */ +#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ + * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) + +/* Convert a gid to a pointer to the GRU */ +#define GID_TO_GRU(gid) \ + (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ + (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ + bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \ + NULL) + +/* Scan all active GRUs in a GRU bitmap */ +#define for_each_gru_in_bitmap(gid, map) \ + for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\ + (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid))) + +/* Scan all active GRUs on a specific blade */ +#define for_each_gru_on_blade(gru, nid, i) \ + for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \ + (i) < GRU_CHIPLETS_PER_BLADE; \ + (i)++, (gru)++) + +/* Scan all GRUs */ +#define foreach_gid(gid) \ + for ((gid) = 0; (gid) < gru_max_gids; (gid)++) + +/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ +#define for_each_gts_on_gru(gts, gru, ctxnum) \ + for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ + if (((gts) = (gru)->gs_gts[ctxnum])) + +/* Scan each CBR whose bit is set in a TFM (or copy of) */ +#define for_each_cbr_in_tfm(i, map) \ + for ((i) = find_first_bit(map, GRU_NUM_CBE); \ + (i) < GRU_NUM_CBE; \ + (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i)) + +/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ +#define for_each_cbr_in_allocation_map(i, map, k) \ + for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \ + (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \ + for ((i) = (k)*GRU_CBR_AU_SIZE; \ + (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) + +/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ +#define for_each_dsr_in_allocation_map(i, map, k) \ + for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\ + (k) < GRU_DSR_AU; \ + (k) = find_next_bit((const unsigned long *)map, \ + GRU_DSR_AU, (k) + 1)) \ + for ((i) = (k) * GRU_DSR_AU_CL; \ + (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) + +#define gseg_physical_address(gru, ctxnum) \ + ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) +#define gseg_virtual_address(gru, ctxnum) \ + ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) + +/*----------------------------------------------------------------------------- + * Lock / Unlock GRU handles + * Use the "delresp" bit in the handle as a "lock" bit. + */ + +/* Lock hierarchy checking enabled only in emulator */ + +/* 0 = lock failed, 1 = locked */ +static inline int __trylock_handle(void *h) +{ + return !test_and_set_bit(1, h); +} + +static inline void __lock_handle(void *h) +{ + while (test_and_set_bit(1, h)) + cpu_relax(); +} + +static inline void __unlock_handle(void *h) +{ + clear_bit(1, h); +} + +static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) +{ + return __trylock_handle(cch); +} + +static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) +{ + __lock_handle(cch); +} + +static inline void unlock_cch_handle(struct gru_context_configuration_handle + *cch) +{ + __unlock_handle(cch); +} + +static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __lock_handle(tgh); +} + +static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __unlock_handle(tgh); +} + +static inline int is_kernel_context(struct gru_thread_state *gts) +{ + return !gts->ts_mm; +} + +/* + * The following are for Nehelem-EX. A more general scheme is needed for + * future processors. + */ +#define UV_MAX_INT_CORES 8 +#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) +#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) +#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ + ((cpu_physical_id(p) >> 1) & 3)) +/*----------------------------------------------------------------------------- + * Function prototypes & externs + */ +struct gru_unload_context_req; + +extern const struct vm_operations_struct gru_vm_ops; +extern struct device *grudev; + +extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, + int tsid); +extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct + *vma, int tsid); +extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct + *vma, int tsid); +extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); +extern void gru_load_context(struct gru_thread_state *gts); +extern void gru_steal_context(struct gru_thread_state *gts); +extern void gru_unload_context(struct gru_thread_state *gts, int savestate); +extern int gru_update_cch(struct gru_thread_state *gts); +extern void gts_drop(struct gru_thread_state *gts); +extern void gru_tgh_flush_init(struct gru_state *gru); +extern int gru_kservices_init(void); +extern void gru_kservices_exit(void); +extern irqreturn_t gru0_intr(int irq, void *dev_id); +extern irqreturn_t gru1_intr(int irq, void *dev_id); +extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); +extern int gru_dump_chiplet_request(unsigned long arg); +extern long gru_get_gseg_statistics(unsigned long arg); +extern int gru_handle_user_call_os(unsigned long address); +extern int gru_user_flush_tlb(unsigned long arg); +extern int gru_user_unload_context(unsigned long arg); +extern int gru_get_exception_detail(unsigned long arg); +extern int gru_set_context_option(unsigned long address); +extern void gru_check_context_placement(struct gru_thread_state *gts); +extern int gru_cpu_fault_map_id(void); +extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); +extern void gru_flush_all_tlb(struct gru_state *gru); +extern int gru_proc_init(void); +extern void gru_proc_exit(void); + +extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid); +extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, + int cbr_au_count, char *cbmap); +extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, + int dsr_au_count, char *dsmap); +extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); +extern struct gru_mm_struct *gru_register_mmu_notifier(void); +extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); + +extern int gru_ktest(unsigned long arg); +extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len); + +extern unsigned long gru_options; + +#endif /* __GRUTABLES_H__ */ diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c new file mode 100644 index 00000000000..240a6d36166 --- /dev/null +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -0,0 +1,378 @@ +/* + * SN Platform GRU Driver + * + * MMUOPS callbacks + TLB flushing + * + * This file handles emu notifier callbacks from the core kernel. The callbacks + * are used to update the TLB in the GRU as a result of changes in the + * state of a process address space. This file also handles TLB invalidates + * from the GRU driver. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/hugetlb.h> +#include <linux/delay.h> +#include <linux/timex.h> +#include <linux/srcu.h> +#include <asm/processor.h> +#include "gru.h" +#include "grutables.h" +#include <asm/uv/uv_hub.h> + +#define gru_random() get_cycles() + +/* ---------------------------------- TLB Invalidation functions -------- + * get_tgh_handle + * + * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the + * local blade, use a fixed TGH that is a function of the blade-local cpu + * number. Normally, this TGH is private to the cpu & no contention occurs for + * the TGH. For offblade GRUs, select a random TGH in the range above the + * private TGHs. A spinlock is required to access this TGH & the lock must be + * released when the invalidate is completes. This sucks, but it is the best we + * can do. + * + * Note that the spinlock is IN the TGH handle so locking does not involve + * additional cache lines. + * + */ +static inline int get_off_blade_tgh(struct gru_state *gru) +{ + int n; + + n = GRU_NUM_TGH - gru->gs_tgh_first_remote; + n = gru_random() % n; + n += gru->gs_tgh_first_remote; + return n; +} + +static inline int get_on_blade_tgh(struct gru_state *gru) +{ + return uv_blade_processor_id() >> gru->gs_tgh_local_shift; +} + +static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state + *gru) +{ + struct gru_tlb_global_handle *tgh; + int n; + + preempt_disable(); + if (uv_numa_blade_id() == gru->gs_blade_id) + n = get_on_blade_tgh(gru); + else + n = get_off_blade_tgh(gru); + tgh = get_tgh_by_index(gru, n); + lock_tgh_handle(tgh); + + return tgh; +} + +static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + unlock_tgh_handle(tgh); + preempt_enable(); +} + +/* + * gru_flush_tlb_range + * + * General purpose TLB invalidation function. This function scans every GRU in + * the ENTIRE system (partition) looking for GRUs where the specified MM has + * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR + * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned + * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the + * cost of (possibly) a large number of future TLBmisses. + * + * The current algorithm is optimized based on the following (somewhat true) + * assumptions: + * - GRU contexts are not loaded into a GRU unless a reference is made to + * the data segment or control block (this is true, not an assumption). + * If a DS/CB is referenced, the user will also issue instructions that + * cause TLBmisses. It is not necessary to optimize for the case where + * contexts are loaded but no instructions cause TLB misses. (I know + * this will happen but I'm not optimizing for it). + * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally + * a few usec but in unusual cases, it could be longer. Avoid if + * possible. + * - intrablade process migration between cpus is not frequent but is + * common. + * - a GRU context is not typically migrated to a different GRU on the + * blade because of intrablade migration + * - interblade migration is rare. Processes migrate their GRU context to + * the new blade. + * - if interblade migration occurs, migration back to the original blade + * is very very rare (ie., no optimization for this case) + * - most GRU instruction operate on a subset of the user REGIONS. Code + * & shared library regions are not likely targets of GRU instructions. + * + * To help improve the efficiency of TLB invalidation, the GMS data + * structure is maintained for EACH address space (MM struct). The GMS is + * also the structure that contains the pointer to the mmu callout + * functions. This structure is linked to the mm_struct for the address space + * using the mmu "register" function. The mmu interfaces are used to + * provide the callbacks for TLB invalidation. The GMS contains: + * + * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is + * loaded into the GRU. + * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in + * the above array + * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active + * in the GRU for the address space. This bitmap must be passed to the + * GRU to do an invalidate. + * + * The current algorithm for invalidating TLBs is: + * - scan the asidmap for GRUs where the context has been loaded, ie, + * asid is non-zero. + * - for each gru found: + * - if the ctxtmap is non-zero, there are active contexts in the + * GRU. TLB invalidate instructions must be issued to the GRU. + * - if the ctxtmap is zero, no context is active. Set the ASID to + * zero to force a full TLB invalidation. This is fast but will + * cause a lot of TLB misses if the context is reloaded onto the + * GRU + * + */ + +void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len) +{ + struct gru_state *gru; + struct gru_mm_tracker *asids; + struct gru_tlb_global_handle *tgh; + unsigned long num; + int grupagesize, pagesize, pageshift, gid, asid; + + /* ZZZ TODO - handle huge pages */ + pageshift = PAGE_SHIFT; + pagesize = (1UL << pageshift); + grupagesize = GRU_PAGESIZE(pageshift); + num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL); + + STAT(flush_tlb); + gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms, + start, len, gms->ms_asidmap[0]); + + spin_lock(&gms->ms_asid_lock); + for_each_gru_in_bitmap(gid, gms->ms_asidmap) { + STAT(flush_tlb_gru); + gru = GID_TO_GRU(gid); + asids = gms->ms_asids + gid; + asid = asids->mt_asid; + if (asids->mt_ctxbitmap && asid) { + STAT(flush_tlb_gru_tgh); + asid = GRUASID(asid, start); + gru_dbg(grudev, + " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n", + gid, asid, start, grupagesize, num, asids->mt_ctxbitmap); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, + num - 1, asids->mt_ctxbitmap); + get_unlock_tgh_handle(tgh); + } else { + STAT(flush_tlb_gru_zero_asid); + asids->mt_asid = 0; + __clear_bit(gru->gs_gid, gms->ms_asidmap); + gru_dbg(grudev, + " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n", + gid, asid, asids->mt_ctxbitmap, + gms->ms_asidmap[0]); + } + } + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Flush the entire TLB on a chiplet. + */ +void gru_flush_all_tlb(struct gru_state *gru) +{ + struct gru_tlb_global_handle *tgh; + + gru_dbg(grudev, "gid %d\n", gru->gs_gid); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff); + get_unlock_tgh_handle(tgh); +} + +/* + * MMUOPS notifier callout functions + */ +static void gru_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + STAT(mmu_invalidate_range); + atomic_inc(&gms->ms_range_active); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms, + start, end, atomic_read(&gms->ms_range_active)); + gru_flush_tlb_range(gms, start, end - start); +} + +static void gru_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + /* ..._and_test() provides needed barrier */ + (void)atomic_dec_and_test(&gms->ms_range_active); + + wake_up_all(&gms->ms_wait_queue); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); +} + +static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + STAT(mmu_invalidate_page); + gru_flush_tlb_range(gms, address, PAGE_SIZE); + gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address); +} + +static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + gms->ms_released = 1; + gru_dbg(grudev, "gms %p\n", gms); +} + + +static const struct mmu_notifier_ops gru_mmuops = { + .invalidate_page = gru_invalidate_page, + .invalidate_range_start = gru_invalidate_range_start, + .invalidate_range_end = gru_invalidate_range_end, + .release = gru_release, +}; + +/* Move this to the basic mmu_notifier file. But for now... */ +static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, + const struct mmu_notifier_ops *ops) +{ + struct mmu_notifier *mn, *gru_mn = NULL; + struct hlist_node *n; + + if (mm->mmu_notifier_mm) { + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, + hlist) + if (mn->ops == ops) { + gru_mn = mn; + break; + } + rcu_read_unlock(); + } + return gru_mn; +} + +struct gru_mm_struct *gru_register_mmu_notifier(void) +{ + struct gru_mm_struct *gms; + struct mmu_notifier *mn; + int err; + + mn = mmu_find_ops(current->mm, &gru_mmuops); + if (mn) { + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + atomic_inc(&gms->ms_refcnt); + } else { + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (gms) { + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + gms->ms_notifier.ops = &gru_mmuops; + atomic_set(&gms->ms_refcnt, 1); + init_waitqueue_head(&gms->ms_wait_queue); + err = __mmu_notifier_register(&gms->ms_notifier, current->mm); + if (err) + goto error; + } + } + gru_dbg(grudev, "gms %p, refcnt %d\n", gms, + atomic_read(&gms->ms_refcnt)); + return gms; +error: + kfree(gms); + return ERR_PTR(err); +} + +void gru_drop_mmu_notifier(struct gru_mm_struct *gms) +{ + gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, + atomic_read(&gms->ms_refcnt), gms->ms_released); + if (atomic_dec_return(&gms->ms_refcnt) == 0) { + if (!gms->ms_released) + mmu_notifier_unregister(&gms->ms_notifier, current->mm); + kfree(gms); + STAT(gms_free); + } +} + +/* + * Setup TGH parameters. There are: + * - 24 TGH handles per GRU chiplet + * - a portion (MAX_LOCAL_TGH) of the handles are reserved for + * use by blade-local cpus + * - the rest are used by off-blade cpus. This usage is + * less frequent than blade-local usage. + * + * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade + * has less tan or equal to 16 cpus, each cpu has a unique handle that it can + * use. + */ +#define MAX_LOCAL_TGH 16 + +void gru_tgh_flush_init(struct gru_state *gru) +{ + int cpus, shift = 0, n; + + cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id); + + /* n = cpus rounded up to next power of 2 */ + if (cpus) { + n = 1 << fls(cpus - 1); + + /* + * shift count for converting local cpu# to TGH index + * 0 if cpus <= MAX_LOCAL_TGH, + * 1 if cpus <= 2*MAX_LOCAL_TGH, + * etc + */ + shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1)); + } + gru->gs_tgh_local_shift = shift; + + /* first starting TGH index to use for remote purges */ + gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift; + +} diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile new file mode 100644 index 00000000000..4fc40d8e1bc --- /dev/null +++ b/drivers/misc/sgi-xp/Makefile @@ -0,0 +1,19 @@ +# +# Makefile for SGI's XP devices. +# + +obj-$(CONFIG_SGI_XP) += xp.o +xp-y := xp_main.o +xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o +xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o +xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o +xp-$(CONFIG_X86_64) += xp_uv.o + +obj-$(CONFIG_SGI_XP) += xpc.o +xpc-y := xpc_main.o xpc_channel.o xpc_partition.o +xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o +xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o +xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o +xpc-$(CONFIG_X86_64) += xpc_uv.o + +obj-$(CONFIG_SGI_XP) += xpnet.o diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h new file mode 100644 index 00000000000..851b2f25ce0 --- /dev/null +++ b/drivers/misc/sgi-xp/xp.h @@ -0,0 +1,359 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved. + */ + +/* + * External Cross Partition (XP) structures and defines. + */ + +#ifndef _DRIVERS_MISC_SGIXP_XP_H +#define _DRIVERS_MISC_SGIXP_XP_H + +#include <linux/mutex.h> + +#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV +#include <asm/uv/uv.h> +#define is_uv() is_uv_system() +#endif + +#ifndef is_uv +#define is_uv() 0 +#endif + +#if defined CONFIG_IA64 +#include <asm/system.h> +#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */ +#define is_shub() ia64_platform_is("sn2") +#endif + +#ifndef is_shub1 +#define is_shub1() 0 +#endif + +#ifndef is_shub2 +#define is_shub2() 0 +#endif + +#ifndef is_shub +#define is_shub() 0 +#endif + +#ifdef USE_DBUG_ON +#define DBUG_ON(condition) BUG_ON(condition) +#else +#define DBUG_ON(condition) +#endif + +/* + * Define the maximum number of partitions the system can possibly support. + * It is based on the maximum number of hardware partitionable regions. The + * term 'region' in this context refers to the minimum number of nodes that + * can comprise an access protection grouping. The access protection is in + * regards to memory, IPI and IOI. + * + * The maximum number of hardware partitionable regions is equal to the + * maximum number of nodes in the entire system divided by the minimum number + * of nodes that comprise an access protection grouping. + */ +#define XP_MAX_NPARTITIONS_SN2 64 +#define XP_MAX_NPARTITIONS_UV 256 + +/* + * XPC establishes channel connections between the local partition and any + * other partition that is currently up. Over these channels, kernel-level + * `users' can communicate with their counterparts on the other partitions. + * + * If the need for additional channels arises, one can simply increase + * XPC_MAX_NCHANNELS accordingly. If the day should come where that number + * exceeds the absolute MAXIMUM number of channels possible (eight), then one + * will need to make changes to the XPC code to accommodate for this. + * + * The absolute maximum number of channels possible is limited to eight for + * performance reasons on sn2 hardware. The internal cross partition structures + * require sixteen bytes per channel, and eight allows all of this + * interface-shared info to fit in one 128-byte cacheline. + */ +#define XPC_MEM_CHANNEL 0 /* memory channel number */ +#define XPC_NET_CHANNEL 1 /* network channel number */ + +#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */ + +#if XPC_MAX_NCHANNELS > 8 +#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible. +#endif + +/* + * Define macro, XPC_MSG_SIZE(), is provided for the user + * that wants to fit as many msg entries as possible in a given memory size + * (e.g. a memory page). + */ +#define XPC_MSG_MAX_SIZE 128 +#define XPC_MSG_HDR_MAX_SIZE 16 +#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE) + +#define XPC_MSG_SIZE(_payload_size) \ + ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \ + is_uv() ? 64 : 128) + + +/* + * Define the return values and values passed to user's callout functions. + * (It is important to add new value codes at the end just preceding + * xpUnknownReason, which must have the highest numerical value.) + */ +enum xp_retval { + xpSuccess = 0, + + xpNotConnected, /* 1: channel is not connected */ + xpConnected, /* 2: channel connected (opened) */ + xpRETIRED1, /* 3: (formerly xpDisconnected) */ + + xpMsgReceived, /* 4: message received */ + xpMsgDelivered, /* 5: message delivered and acknowledged */ + + xpRETIRED2, /* 6: (formerly xpTransferFailed) */ + + xpNoWait, /* 7: operation would require wait */ + xpRetry, /* 8: retry operation */ + xpTimeout, /* 9: timeout in xpc_allocate_msg_wait() */ + xpInterrupted, /* 10: interrupted wait */ + + xpUnequalMsgSizes, /* 11: message size disparity between sides */ + xpInvalidAddress, /* 12: invalid address */ + + xpNoMemory, /* 13: no memory available for XPC structures */ + xpLackOfResources, /* 14: insufficient resources for operation */ + xpUnregistered, /* 15: channel is not registered */ + xpAlreadyRegistered, /* 16: channel is already registered */ + + xpPartitionDown, /* 17: remote partition is down */ + xpNotLoaded, /* 18: XPC module is not loaded */ + xpUnloading, /* 19: this side is unloading XPC module */ + + xpBadMagic, /* 20: XPC MAGIC string not found */ + + xpReactivating, /* 21: remote partition was reactivated */ + + xpUnregistering, /* 22: this side is unregistering channel */ + xpOtherUnregistering, /* 23: other side is unregistering channel */ + + xpCloneKThread, /* 24: cloning kernel thread */ + xpCloneKThreadFailed, /* 25: cloning kernel thread failed */ + + xpNoHeartbeat, /* 26: remote partition has no heartbeat */ + + xpPioReadError, /* 27: PIO read error */ + xpPhysAddrRegFailed, /* 28: registration of phys addr range failed */ + + xpRETIRED3, /* 29: (formerly xpBteDirectoryError) */ + xpRETIRED4, /* 30: (formerly xpBtePoisonError) */ + xpRETIRED5, /* 31: (formerly xpBteWriteError) */ + xpRETIRED6, /* 32: (formerly xpBteAccessError) */ + xpRETIRED7, /* 33: (formerly xpBtePWriteError) */ + xpRETIRED8, /* 34: (formerly xpBtePReadError) */ + xpRETIRED9, /* 35: (formerly xpBteTimeOutError) */ + xpRETIRED10, /* 36: (formerly xpBteXtalkError) */ + xpRETIRED11, /* 37: (formerly xpBteNotAvailable) */ + xpRETIRED12, /* 38: (formerly xpBteUnmappedError) */ + + xpBadVersion, /* 39: bad version number */ + xpVarsNotSet, /* 40: the XPC variables are not set up */ + xpNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */ + xpInvalidPartid, /* 42: invalid partition ID */ + xpLocalPartid, /* 43: local partition ID */ + + xpOtherGoingDown, /* 44: other side going down, reason unknown */ + xpSystemGoingDown, /* 45: system is going down, reason unknown */ + xpSystemHalt, /* 46: system is being halted */ + xpSystemReboot, /* 47: system is being rebooted */ + xpSystemPoweroff, /* 48: system is being powered off */ + + xpDisconnecting, /* 49: channel disconnecting (closing) */ + + xpOpenCloseError, /* 50: channel open/close protocol error */ + + xpDisconnected, /* 51: channel disconnected (closed) */ + + xpBteCopyError, /* 52: bte_copy() returned error */ + xpSalError, /* 53: sn SAL error */ + xpRsvdPageNotSet, /* 54: the reserved page is not set up */ + xpPayloadTooBig, /* 55: payload too large for message slot */ + + xpUnsupported, /* 56: unsupported functionality or resource */ + xpNeedMoreInfo, /* 57: more info is needed by SAL */ + + xpGruCopyError, /* 58: gru_copy_gru() returned error */ + xpGruSendMqError, /* 59: gru send message queue related error */ + + xpBadChannelNumber, /* 60: invalid channel number */ + xpBadMsgType, /* 61: invalid message type */ + xpBiosError, /* 62: BIOS error */ + + xpUnknownReason /* 63: unknown reason - must be last in enum */ +}; + +/* + * Define the callout function type used by XPC to update the user on + * connection activity and state changes via the user function registered + * by xpc_connect(). + * + * Arguments: + * + * reason - reason code. + * partid - partition ID associated with condition. + * ch_number - channel # associated with condition. + * data - pointer to optional data. + * key - pointer to optional user-defined value provided as the "key" + * argument to xpc_connect(). + * + * A reason code of xpConnected indicates that a connection has been + * established to the specified partition on the specified channel. The data + * argument indicates the max number of entries allowed in the message queue. + * + * A reason code of xpMsgReceived indicates that a XPC message arrived from + * the specified partition on the specified channel. The data argument + * specifies the address of the message's payload. The user must call + * xpc_received() when finished with the payload. + * + * All other reason codes indicate failure. The data argmument is NULL. + * When a failure reason code is received, one can assume that the channel + * is not connected. + */ +typedef void (*xpc_channel_func) (enum xp_retval reason, short partid, + int ch_number, void *data, void *key); + +/* + * Define the callout function type used by XPC to notify the user of + * messages received and delivered via the user function registered by + * xpc_send_notify(). + * + * Arguments: + * + * reason - reason code. + * partid - partition ID associated with condition. + * ch_number - channel # associated with condition. + * key - pointer to optional user-defined value provided as the "key" + * argument to xpc_send_notify(). + * + * A reason code of xpMsgDelivered indicates that the message was delivered + * to the intended recipient and that they have acknowledged its receipt by + * calling xpc_received(). + * + * All other reason codes indicate failure. + * + * NOTE: The user defined function must be callable by an interrupt handler + * and thus cannot block. + */ +typedef void (*xpc_notify_func) (enum xp_retval reason, short partid, + int ch_number, void *key); + +/* + * The following is a registration entry. There is a global array of these, + * one per channel. It is used to record the connection registration made + * by the users of XPC. As long as a registration entry exists, for any + * partition that comes up, XPC will attempt to establish a connection on + * that channel. Notification that a connection has been made will occur via + * the xpc_channel_func function. + * + * The 'func' field points to the function to call when aynchronous + * notification is required for such events as: a connection established/lost, + * or an incoming message received, or an error condition encountered. A + * non-NULL 'func' field indicates that there is an active registration for + * the channel. + */ +struct xpc_registration { + struct mutex mutex; + xpc_channel_func func; /* function to call */ + void *key; /* pointer to user's key */ + u16 nentries; /* #of msg entries in local msg queue */ + u16 entry_size; /* message queue's message entry size */ + u32 assigned_limit; /* limit on #of assigned kthreads */ + u32 idle_limit; /* limit on #of idle kthreads */ +} ____cacheline_aligned; + +#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL) + +/* the following are valid xpc_send() or xpc_send_notify() flags */ +#define XPC_WAIT 0 /* wait flag */ +#define XPC_NOWAIT 1 /* no wait flag */ + +struct xpc_interface { + void (*connect) (int); + void (*disconnect) (int); + enum xp_retval (*send) (short, int, u32, void *, u16); + enum xp_retval (*send_notify) (short, int, u32, void *, u16, + xpc_notify_func, void *); + void (*received) (short, int, void *); + enum xp_retval (*partid_to_nasids) (short, void *); +}; + +extern struct xpc_interface xpc_interface; + +extern void xpc_set_interface(void (*)(int), + void (*)(int), + enum xp_retval (*)(short, int, u32, void *, u16), + enum xp_retval (*)(short, int, u32, void *, u16, + xpc_notify_func, void *), + void (*)(short, int, void *), + enum xp_retval (*)(short, void *)); +extern void xpc_clear_interface(void); + +extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16, + u16, u32, u32); +extern void xpc_disconnect(int); + +static inline enum xp_retval +xpc_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) +{ + return xpc_interface.send(partid, ch_number, flags, payload, + payload_size); +} + +static inline enum xp_retval +xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) +{ + return xpc_interface.send_notify(partid, ch_number, flags, payload, + payload_size, func, key); +} + +static inline void +xpc_received(short partid, int ch_number, void *payload) +{ + return xpc_interface.received(partid, ch_number, payload); +} + +static inline enum xp_retval +xpc_partid_to_nasids(short partid, void *nasids) +{ + return xpc_interface.partid_to_nasids(partid, nasids); +} + +extern short xp_max_npartitions; +extern short xp_partition_id; +extern u8 xp_region_size; + +extern unsigned long (*xp_pa) (void *); +extern unsigned long (*xp_socket_pa) (unsigned long); +extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long, + size_t); +extern int (*xp_cpu_to_nasid) (int); +extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long); +extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long); + +extern u64 xp_nofault_PIOR_target; +extern int xp_nofault_PIOR(void *); +extern int xp_error_PIOR(void); + +extern struct device *xp; +extern enum xp_retval xp_init_sn2(void); +extern enum xp_retval xp_init_uv(void); +extern void xp_exit_sn2(void); +extern void xp_exit_uv(void); + +#endif /* _DRIVERS_MISC_SGIXP_XP_H */ diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c new file mode 100644 index 00000000000..01be66d02ca --- /dev/null +++ b/drivers/misc/sgi-xp/xp_main.c @@ -0,0 +1,286 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) base. + * + * XP provides a base from which its users can interact + * with XPC, yet not be dependent on XPC. + * + */ + +#include <linux/module.h> +#include <linux/device.h> +#include "xp.h" + +/* define the XP debug device structures to be used with dev_dbg() et al */ + +struct device_driver xp_dbg_name = { + .name = "xp" +}; + +struct device xp_dbg_subname = { + .init_name = "", /* set to "" */ + .driver = &xp_dbg_name +}; + +struct device *xp = &xp_dbg_subname; + +/* max #of partitions possible */ +short xp_max_npartitions; +EXPORT_SYMBOL_GPL(xp_max_npartitions); + +short xp_partition_id; +EXPORT_SYMBOL_GPL(xp_partition_id); + +u8 xp_region_size; +EXPORT_SYMBOL_GPL(xp_region_size); + +unsigned long (*xp_pa) (void *addr); +EXPORT_SYMBOL_GPL(xp_pa); + +unsigned long (*xp_socket_pa) (unsigned long gpa); +EXPORT_SYMBOL_GPL(xp_socket_pa); + +enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa, + const unsigned long src_gpa, size_t len); +EXPORT_SYMBOL_GPL(xp_remote_memcpy); + +int (*xp_cpu_to_nasid) (int cpuid); +EXPORT_SYMBOL_GPL(xp_cpu_to_nasid); + +enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr, + unsigned long size); +EXPORT_SYMBOL_GPL(xp_expand_memprotect); +enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr, + unsigned long size); +EXPORT_SYMBOL_GPL(xp_restrict_memprotect); + +/* + * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level + * users of XPC. + */ +struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS]; +EXPORT_SYMBOL_GPL(xpc_registrations); + +/* + * Initialize the XPC interface to indicate that XPC isn't loaded. + */ +static enum xp_retval +xpc_notloaded(void) +{ + return xpNotLoaded; +} + +struct xpc_interface xpc_interface = { + (void (*)(int))xpc_notloaded, + (void (*)(int))xpc_notloaded, + (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded, + (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func, + void *))xpc_notloaded, + (void (*)(short, int, void *))xpc_notloaded, + (enum xp_retval(*)(short, void *))xpc_notloaded +}; +EXPORT_SYMBOL_GPL(xpc_interface); + +/* + * XPC calls this when it (the XPC module) has been loaded. + */ +void +xpc_set_interface(void (*connect) (int), + void (*disconnect) (int), + enum xp_retval (*send) (short, int, u32, void *, u16), + enum xp_retval (*send_notify) (short, int, u32, void *, u16, + xpc_notify_func, void *), + void (*received) (short, int, void *), + enum xp_retval (*partid_to_nasids) (short, void *)) +{ + xpc_interface.connect = connect; + xpc_interface.disconnect = disconnect; + xpc_interface.send = send; + xpc_interface.send_notify = send_notify; + xpc_interface.received = received; + xpc_interface.partid_to_nasids = partid_to_nasids; +} +EXPORT_SYMBOL_GPL(xpc_set_interface); + +/* + * XPC calls this when it (the XPC module) is being unloaded. + */ +void +xpc_clear_interface(void) +{ + xpc_interface.connect = (void (*)(int))xpc_notloaded; + xpc_interface.disconnect = (void (*)(int))xpc_notloaded; + xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16)) + xpc_notloaded; + xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *, + u16, xpc_notify_func, + void *))xpc_notloaded; + xpc_interface.received = (void (*)(short, int, void *)) + xpc_notloaded; + xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *)) + xpc_notloaded; +} +EXPORT_SYMBOL_GPL(xpc_clear_interface); + +/* + * Register for automatic establishment of a channel connection whenever + * a partition comes up. + * + * Arguments: + * + * ch_number - channel # to register for connection. + * func - function to call for asynchronous notification of channel + * state changes (i.e., connection, disconnection, error) and + * the arrival of incoming messages. + * key - pointer to optional user-defined value that gets passed back + * to the user on any callouts made to func. + * payload_size - size in bytes of the XPC message's payload area which + * contains a user-defined message. The user should make + * this large enough to hold their largest message. + * nentries - max #of XPC message entries a message queue can contain. + * The actual number, which is determined when a connection + * is established and may be less then requested, will be + * passed to the user via the xpConnected callout. + * assigned_limit - max number of kthreads allowed to be processing + * messages (per connection) at any given instant. + * idle_limit - max number of kthreads allowed to be idle at any given + * instant. + */ +enum xp_retval +xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, + u16 nentries, u32 assigned_limit, u32 idle_limit) +{ + struct xpc_registration *registration; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + DBUG_ON(payload_size == 0 || nentries == 0); + DBUG_ON(func == NULL); + DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); + + if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE) + return xpPayloadTooBig; + + registration = &xpc_registrations[ch_number]; + + if (mutex_lock_interruptible(®istration->mutex) != 0) + return xpInterrupted; + + /* if XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func != NULL) { + mutex_unlock(®istration->mutex); + return xpAlreadyRegistered; + } + + /* register the channel for connection */ + registration->entry_size = XPC_MSG_SIZE(payload_size); + registration->nentries = nentries; + registration->assigned_limit = assigned_limit; + registration->idle_limit = idle_limit; + registration->key = key; + registration->func = func; + + mutex_unlock(®istration->mutex); + + xpc_interface.connect(ch_number); + + return xpSuccess; +} +EXPORT_SYMBOL_GPL(xpc_connect); + +/* + * Remove the registration for automatic connection of the specified channel + * when a partition comes up. + * + * Before returning this xpc_disconnect() will wait for all connections on the + * specified channel have been closed/torndown. So the caller can be assured + * that they will not be receiving any more callouts from XPC to their + * function registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_disconnect(int ch_number) +{ + struct xpc_registration *registration; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + registration = &xpc_registrations[ch_number]; + + /* + * We've decided not to make this a down_interruptible(), since we + * figured XPC's users will just turn around and call xpc_disconnect() + * again anyways, so we might as well wait, if need be. + */ + mutex_lock(®istration->mutex); + + /* if !XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func == NULL) { + mutex_unlock(®istration->mutex); + return; + } + + /* remove the connection registration for the specified channel */ + registration->func = NULL; + registration->key = NULL; + registration->nentries = 0; + registration->entry_size = 0; + registration->assigned_limit = 0; + registration->idle_limit = 0; + + xpc_interface.disconnect(ch_number); + + mutex_unlock(®istration->mutex); + + return; +} +EXPORT_SYMBOL_GPL(xpc_disconnect); + +int __init +xp_init(void) +{ + enum xp_retval ret; + int ch_number; + + /* initialize the connection registration mutex */ + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) + mutex_init(&xpc_registrations[ch_number].mutex); + + if (is_shub()) + ret = xp_init_sn2(); + else if (is_uv()) + ret = xp_init_uv(); + else + ret = 0; + + if (ret != xpSuccess) + return ret; + + return 0; +} + +module_init(xp_init); + +void __exit +xp_exit(void) +{ + if (is_shub()) + xp_exit_sn2(); + else if (is_uv()) + xp_exit_uv(); +} + +module_exit(xp_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition (XP) base"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S new file mode 100644 index 00000000000..e38d4331942 --- /dev/null +++ b/drivers/misc/sgi-xp/xp_nofault.S @@ -0,0 +1,35 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * The xp_nofault_PIOR function takes a pointer to a remote PIO register + * and attempts to load and consume a value from it. This function + * will be registered as a nofault code block. In the event that the + * PIO read fails, the MCA handler will force the error to look + * corrected and vector to the xp_error_PIOR which will return an error. + * + * The definition of "consumption" and the time it takes for an MCA + * to surface is processor implementation specific. This code + * is sufficient on Itanium through the Montvale processor family. + * It may need to be adjusted for future processor implementations. + * + * extern int xp_nofault_PIOR(void *remote_register); + */ + + .global xp_nofault_PIOR +xp_nofault_PIOR: + mov r8=r0 // Stage a success return value + ld8.acq r9=[r32];; // PIO Read the specified register + adds r9=1,r9;; // Add to force consumption + srlz.i;; // Allow time for MCA to surface + br.ret.sptk.many b0;; // Return success + + .global xp_error_PIOR +xp_error_PIOR: + mov r8=1 // Return value of 1 + br.ret.sptk.many b0;; // Return failure diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c new file mode 100644 index 00000000000..d8e463f8724 --- /dev/null +++ b/drivers/misc/sgi-xp/xp_sn2.c @@ -0,0 +1,190 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) sn2-based functions. + * + * Architecture specific implementation of common functions. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <asm/sn/bte.h> +#include <asm/sn/sn_sal.h> +#include "xp.h" + +/* + * The export of xp_nofault_PIOR needs to happen here since it is defined + * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is + * defined here. + */ +EXPORT_SYMBOL_GPL(xp_nofault_PIOR); + +u64 xp_nofault_PIOR_target; +EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target); + +/* + * Register a nofault code region which performs a cross-partition PIO read. + * If the PIO read times out, the MCA handler will consume the error and + * return to a kernel-provided instruction to indicate an error. This PIO read + * exists because it is guaranteed to timeout if the destination is down + * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2, + * which unfortunately we have to work around). + */ +static enum xp_retval +xp_register_nofault_code_sn2(void) +{ + int ret; + u64 func_addr; + u64 err_func_addr; + + func_addr = *(u64 *)xp_nofault_PIOR; + err_func_addr = *(u64 *)xp_error_PIOR; + ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr, + 1, 1); + if (ret != 0) { + dev_err(xp, "can't register nofault code, error=%d\n", ret); + return xpSalError; + } + /* + * Setup the nofault PIO read target. (There is no special reason why + * SH_IPI_ACCESS was selected.) + */ + if (is_shub1()) + xp_nofault_PIOR_target = SH1_IPI_ACCESS; + else if (is_shub2()) + xp_nofault_PIOR_target = SH2_IPI_ACCESS0; + + return xpSuccess; +} + +static void +xp_unregister_nofault_code_sn2(void) +{ + u64 func_addr = *(u64 *)xp_nofault_PIOR; + u64 err_func_addr = *(u64 *)xp_error_PIOR; + + /* unregister the PIO read nofault code region */ + (void)sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 0); +} + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_sn2(void *addr) +{ + return __pa(addr); +} + +/* + * Convert a global physical to a socket physical address. + */ +static unsigned long +xp_socket_pa_sn2(unsigned long gpa) +{ + return gpa; +} + +/* + * Wrapper for bte_copy(). + * + * dst_pa - physical address of the destination of the transfer. + * src_pa - physical address of the source of the transfer. + * len - number of bytes to transfer from source to destination. + * + * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock. + */ +static enum xp_retval +xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa, + size_t len) +{ + bte_result_t ret; + + ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (ret == BTE_SUCCESS) + return xpSuccess; + + if (is_shub2()) { + dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa=" + "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa, + src_pa, len); + } else { + dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx " + "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len); + } + + return xpBteCopyError; +} + +static int +xp_cpu_to_nasid_sn2(int cpuid) +{ + return cpuid_to_nasid(cpuid); +} + +static enum xp_retval +xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size) +{ + u64 nasid_array = 0; + int ret; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); + return xpSalError; + } + return xpSuccess; +} + +static enum xp_retval +xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size) +{ + u64 nasid_array = 0; + int ret; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); + return xpSalError; + } + return xpSuccess; +} + +enum xp_retval +xp_init_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_SN2; + xp_partition_id = sn_partition_id; + xp_region_size = sn_region_size; + + xp_pa = xp_pa_sn2; + xp_socket_pa = xp_socket_pa_sn2; + xp_remote_memcpy = xp_remote_memcpy_sn2; + xp_cpu_to_nasid = xp_cpu_to_nasid_sn2; + xp_expand_memprotect = xp_expand_memprotect_sn2; + xp_restrict_memprotect = xp_restrict_memprotect_sn2; + + return xp_register_nofault_code_sn2(); +} + +void +xp_exit_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_unregister_nofault_code_sn2(); +} + diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c new file mode 100644 index 00000000000..a0d093274dc --- /dev/null +++ b/drivers/misc/sgi-xp/xp_uv.c @@ -0,0 +1,171 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/device.h> +#include <asm/uv/uv_hub.h> +#if defined CONFIG_X86_64 +#include <asm/uv/bios.h> +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#include <asm/sn/sn_sal.h> +#endif +#include "../sgi-gru/grukservices.h" +#include "xp.h" + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_uv(void *addr) +{ + return uv_gpa(addr); +} + +/* + * Convert a global physical to socket physical address. + */ +static unsigned long +xp_socket_pa_uv(unsigned long gpa) +{ + return uv_gpa_to_soc_phys_ram(gpa); +} + +static enum xp_retval +xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa)); + + BUG_ON(!uv_gpa_in_mmr_space(src_gpa)); + BUG_ON(len != 8); + + ret = gru_read_gpa(dst_va, src_gpa); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + + +static enum xp_retval +xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + + if (uv_gpa_in_mmr_space(src_gpa)) + return xp_remote_mmr_read(dst_gpa, src_gpa, len); + + ret = gru_copy_gpa(dst_gpa, src_gpa, len); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + +static int +xp_cpu_to_nasid_uv(int cpuid) +{ + /* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */ + return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid)); +} + +static enum xp_retval +xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size) +{ + int ret; + +#if defined CONFIG_X86_64 + ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW); + if (ret != BIOS_STATUS_SUCCESS) { + dev_err(xp, "uv_bios_change_memprotect(,, " + "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret); + return xpBiosError; + } + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); + return xpSalError; + } +#else + #error not a supported configuration +#endif + return xpSuccess; +} + +static enum xp_retval +xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size) +{ + int ret; + +#if defined CONFIG_X86_64 + ret = uv_bios_change_memprotect(phys_addr, size, + UV_MEMPROT_RESTRICT_ACCESS); + if (ret != BIOS_STATUS_SUCCESS) { + dev_err(xp, "uv_bios_change_memprotect(,, " + "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret); + return xpBiosError; + } + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); + return xpSalError; + } +#else + #error not a supported configuration +#endif + return xpSuccess; +} + +enum xp_retval +xp_init_uv(void) +{ + BUG_ON(!is_uv()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_UV; + xp_partition_id = sn_partition_id; + xp_region_size = sn_region_size; + + xp_pa = xp_pa_uv; + xp_socket_pa = xp_socket_pa_uv; + xp_remote_memcpy = xp_remote_memcpy_uv; + xp_cpu_to_nasid = xp_cpu_to_nasid_uv; + xp_expand_memprotect = xp_expand_memprotect_uv; + xp_restrict_memprotect = xp_restrict_memprotect_uv; + + return xpSuccess; +} + +void +xp_exit_uv(void) +{ + BUG_ON(!is_uv()); +} diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h new file mode 100644 index 00000000000..b94d5f76770 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc.h @@ -0,0 +1,1004 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) structures and macros. + */ + +#ifndef _DRIVERS_MISC_SGIXP_XPC_H +#define _DRIVERS_MISC_SGIXP_XPC_H + +#include <linux/wait.h> +#include <linux/completion.h> +#include <linux/timer.h> +#include <linux/sched.h> +#include "xp.h" + +/* + * XPC Version numbers consist of a major and minor number. XPC can always + * talk to versions with same major #, and never talk to versions with a + * different major #. + */ +#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf)) +#define XPC_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPC_VERSION_MINOR(_v) ((_v) & 0xf) + +/* define frequency of the heartbeat and frequency how often it's checked */ +#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ +#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */ + +/* define the process name of HB checker and the CPU it is pinned to */ +#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" +#define XPC_HB_CHECK_CPU 0 + +/* define the process name of the discovery thread */ +#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" + +/* + * the reserved page + * + * SAL reserves one page of memory per partition for XPC. Though a full page + * in length (16384 bytes), its starting address is not page aligned, but it + * is cacheline aligned. The reserved page consists of the following: + * + * reserved page header + * + * The first two 64-byte cachelines of the reserved page contain the + * header (struct xpc_rsvd_page). Before SAL initialization has completed, + * SAL has set up the following fields of the reserved page header: + * SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The + * other fields are set up by XPC. (xpc_rsvd_page points to the local + * partition's reserved page.) + * + * part_nasids mask + * mach_nasids mask + * + * SAL also sets up two bitmaps (or masks), one that reflects the actual + * nasids in this partition (part_nasids), and the other that reflects + * the actual nasids in the entire machine (mach_nasids). We're only + * interested in the even numbered nasids (which contain the processors + * and/or memory), so we only need half as many bits to represent the + * nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure + * to either divide or multiply by 2. The part_nasids mask is located + * starting at the first cacheline following the reserved page header. The + * mach_nasids mask follows right after the part_nasids mask. The size in + * bytes of each mask is reflected by the reserved page header field + * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids + * and xpc_mach_nasids.) + * + * vars (ia64-sn2 only) + * vars part (ia64-sn2 only) + * + * Immediately following the mach_nasids mask are the XPC variables + * required by other partitions. First are those that are generic to all + * partitions (vars), followed on the next available cacheline by those + * which are partition specific (vars part). These are setup by XPC. + * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) + * + * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been + * initialized. + */ +struct xpc_rsvd_page { + u64 SAL_signature; /* SAL: unique signature */ + u64 SAL_version; /* SAL: version */ + short SAL_partid; /* SAL: partition ID */ + short max_npartitions; /* value of XPC_MAX_PARTITIONS */ + u8 version; + u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ + unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ + union { + struct { + unsigned long vars_pa; /* phys addr */ + } sn2; + struct { + unsigned long heartbeat_gpa; /* phys addr */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr */ + } uv; + } sn; + u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */ + u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */ +}; + +#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */ + +/* + * Define the structures by which XPC variables can be exported to other + * partitions. (There are two: struct xpc_vars and struct xpc_vars_part) + */ + +/* + * The following structure describes the partition generic variables + * needed by other partitions in order to properly initialize. + * + * struct xpc_vars version number also applies to struct xpc_vars_part. + * Changes to either structure and/or related functionality should be + * reflected by incrementing either the major or minor version numbers + * of struct xpc_vars. + */ +struct xpc_vars_sn2 { + u8 version; + u64 heartbeat; + DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + u64 heartbeat_offline; /* if 0, heartbeat should be changing */ + int activate_IRQ_nasid; + int activate_IRQ_phys_cpuid; + unsigned long vars_part_pa; + unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */ + struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */ +}; + +#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */ + +/* + * The following structure describes the per partition specific variables. + * + * An array of these structures, one per partition, will be defined. As a + * partition becomes active XPC will copy the array entry corresponding to + * itself from that partition. It is desirable that the size of this structure + * evenly divides into a 128-byte cacheline, such that none of the entries in + * this array crosses a 128-byte cacheline boundary. As it is now, each entry + * occupies 64-bytes. + */ +struct xpc_vars_part_sn2 { + u64 magic; + + unsigned long openclose_args_pa; /* phys addr of open and close args */ + unsigned long GPs_pa; /* physical address of Get/Put values */ + + unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */ + + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ + + u8 nchannels; /* #of defined channels supported */ + + u8 reserved[23]; /* pad to a full 64 bytes */ +}; + +/* + * The vars_part MAGIC numbers play a part in the first contact protocol. + * + * MAGIC1 indicates that the per partition specific variables for a remote + * partition have been initialized by this partition. + * + * MAGIC2 indicates that this partition has pulled the remote partititions + * per partition variables that pertain to this partition. + */ +#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ +#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ + +/* the reserved page sizes and offsets */ + +#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) +#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2)) + +#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \ + XPC_RP_HEADER_SIZE)) +#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs) +#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \ + (XPC_RP_MACH_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs)) + + +/* + * The following structure describes the partition's heartbeat info which + * will be periodically read by other partitions to determine whether this + * XPC is still 'alive'. + */ +struct xpc_heartbeat_uv { + unsigned long value; + unsigned long offline; /* if 0, heartbeat should be changing */ +}; + +/* + * Info pertinent to a GRU message queue using a watch list for irq generation. + */ +struct xpc_gru_mq_uv { + void *address; /* address of GRU message queue */ + unsigned int order; /* size of GRU message queue as a power of 2 */ + int irq; /* irq raised when message is received in mq */ + int mmr_blade; /* blade where watchlist was allocated from */ + unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */ + unsigned long mmr_value; /* value of irq mmr located on mmr_blade */ + int watchlist_num; /* number of watchlist allocatd by BIOS */ + void *gru_mq_desc; /* opaque structure used by the GRU driver */ +}; + +/* + * The activate_mq is used to send/receive GRU messages that affect XPC's + * partition active state and channel state. This is uv only. + */ +struct xpc_activate_mq_msghdr_uv { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ + short partid; /* sender's partid */ + u8 act_state; /* sender's act_state at time msg sent */ + u8 type; /* message's type */ + unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */ +}; + +/* activate_mq defined message types */ +#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0 + +#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1 +#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2 + +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV 7 + +#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 8 +#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 9 + +struct xpc_activate_mq_msg_uv { + struct xpc_activate_mq_msghdr_uv hdr; +}; + +struct xpc_activate_mq_msg_activate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + unsigned long rp_gpa; + unsigned long heartbeat_gpa; + unsigned long activate_gru_mq_desc_gpa; +}; + +struct xpc_activate_mq_msg_deactivate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closerequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closereply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; +}; + +struct xpc_activate_mq_msg_chctl_openrequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short entry_size; /* size of notify_mq's GRU messages */ + short local_nentries; /* ??? Is this needed? What is? */ +}; + +struct xpc_activate_mq_msg_chctl_openreply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short remote_nentries; /* ??? Is this needed? What is? */ + short local_nentries; /* ??? Is this needed? What is? */ + unsigned long notify_gru_mq_desc_gpa; +}; + +struct xpc_activate_mq_msg_chctl_opencomplete_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; +}; + +/* + * Functions registered by add_timer() or called by kernel_thread() only + * allow for a single 64-bit argument. The following macros can be used to + * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from + * the passed argument. + */ +#define XPC_PACK_ARGS(_arg1, _arg2) \ + ((((u64)_arg1) & 0xffffffff) | \ + ((((u64)_arg2) & 0xffffffff) << 32)) + +#define XPC_UNPACK_ARG1(_args) (((u64)_args) & 0xffffffff) +#define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff) + +/* + * Define a Get/Put value pair (pointers) used with a message queue. + */ +struct xpc_gp_sn2 { + s64 get; /* Get value */ + s64 put; /* Put value */ +}; + +#define XPC_GP_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS) + +/* + * Define a structure that contains arguments associated with opening and + * closing a channel. + */ +struct xpc_openclose_args { + u16 reason; /* reason why channel is closing */ + u16 entry_size; /* sizeof each message entry */ + u16 remote_nentries; /* #of message entries in remote msg queue */ + u16 local_nentries; /* #of message entries in local msg queue */ + unsigned long local_msgqueue_pa; /* phys addr of local message queue */ +}; + +#define XPC_OPENCLOSE_ARGS_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \ + XPC_MAX_NCHANNELS) + + +/* + * Structures to define a fifo singly-linked list. + */ + +struct xpc_fifo_entry_uv { + struct xpc_fifo_entry_uv *next; +}; + +struct xpc_fifo_head_uv { + struct xpc_fifo_entry_uv *first; + struct xpc_fifo_entry_uv *last; + spinlock_t lock; + int n_entries; +}; + +/* + * Define a sn2 styled message. + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message entry (within a message queue) must be a 128-byte + * cacheline sized multiple in order to facilitate the BTE transfer of messages + * from one message queue to another. + */ +struct xpc_msg_sn2 { + u8 flags; /* FOR XPC INTERNAL USE ONLY */ + u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */ + s64 number; /* FOR XPC INTERNAL USE ONLY */ + + u64 payload; /* user defined portion of message */ +}; + +/* struct xpc_msg_sn2 flags */ + +#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */ +#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */ +#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */ + +/* + * The format of a uv XPC notify_mq GRU message is as follows: + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message (payload and header) sent via the GRU must be either 1 + * or 2 GRU_CACHE_LINE_BYTES in length. + */ + +struct xpc_notify_mq_msghdr_uv { + union { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ + struct xpc_fifo_entry_uv next; /* FOR XPC INTERNAL USE ONLY */ + } u; + short partid; /* FOR XPC INTERNAL USE ONLY */ + u8 ch_number; /* FOR XPC INTERNAL USE ONLY */ + u8 size; /* FOR XPC INTERNAL USE ONLY */ + unsigned int msg_slot_number; /* FOR XPC INTERNAL USE ONLY */ +}; + +struct xpc_notify_mq_msg_uv { + struct xpc_notify_mq_msghdr_uv hdr; + unsigned long payload; +}; + +/* + * Define sn2's notify entry. + * + * This is used to notify a message's sender that their message was received + * and consumed by the intended recipient. + */ +struct xpc_notify_sn2 { + u8 type; /* type of notification */ + + /* the following two fields are only used if type == XPC_N_CALL */ + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* struct xpc_notify_sn2 type of notification */ + +#define XPC_N_CALL 0x01 /* notify function provided by user */ + +/* + * Define uv's version of the notify entry. It additionally is used to allocate + * a msg slot on the remote partition into which is copied a sent message. + */ +struct xpc_send_msg_slot_uv { + struct xpc_fifo_entry_uv next; + unsigned int msg_slot_number; + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* + * Define the structure that manages all the stuff required by a channel. In + * particular, they are used to manage the messages sent across the channel. + * + * This structure is private to a partition, and is NOT shared across the + * partition boundary. + * + * There is an array of these structures for each remote partition. It is + * allocated at the time a partition becomes active. The array contains one + * of these structures for each potential channel connection to that partition. + */ + +/* + * The following is sn2 only. + * + * Each channel structure manages two message queues (circular buffers). + * They are allocated at the time a channel connection is made. One of + * these message queues (local_msgqueue) holds the locally created messages + * that are destined for the remote partition. The other of these message + * queues (remote_msgqueue) is a locally cached copy of the remote partition's + * own local_msgqueue. + * + * The following is a description of the Get/Put pointers used to manage these + * two message queues. Consider the local_msgqueue to be on one partition + * and the remote_msgqueue to be its cached copy on another partition. A + * description of what each of the lettered areas contains is included. + * + * + * local_msgqueue remote_msgqueue + * + * |/////////| |/////////| + * w_remote_GP.get --> +---------+ |/////////| + * | F | |/////////| + * remote_GP.get --> +---------+ +---------+ <-- local_GP->get + * | | | | + * | | | E | + * | | | | + * | | +---------+ <-- w_local_GP.get + * | B | |/////////| + * | | |////D////| + * | | |/////////| + * | | +---------+ <-- w_remote_GP.put + * | | |////C////| + * local_GP->put --> +---------+ +---------+ <-- remote_GP.put + * | | |/////////| + * | A | |/////////| + * | | |/////////| + * w_local_GP.put --> +---------+ |/////////| + * |/////////| |/////////| + * + * + * ( remote_GP.[get|put] are cached copies of the remote + * partition's local_GP->[get|put], and thus their values can + * lag behind their counterparts on the remote partition. ) + * + * + * A - Messages that have been allocated, but have not yet been sent to the + * remote partition. + * + * B - Messages that have been sent, but have not yet been acknowledged by the + * remote partition as having been received. + * + * C - Area that needs to be prepared for the copying of sent messages, by + * the clearing of the message flags of any previously received messages. + * + * D - Area into which sent messages are to be copied from the remote + * partition's local_msgqueue and then delivered to their intended + * recipients. [ To allow for a multi-message copy, another pointer + * (next_msg_to_pull) has been added to keep track of the next message + * number needing to be copied (pulled). It chases after w_remote_GP.put. + * Any messages lying between w_local_GP.get and next_msg_to_pull have + * been copied and are ready to be delivered. ] + * + * E - Messages that have been copied and delivered, but have not yet been + * acknowledged by the recipient as having been received. + * + * F - Messages that have been acknowledged, but XPC has not yet notified the + * sender that the message was received by its intended recipient. + * This is also an area that needs to be prepared for the allocating of + * new messages, by the clearing of the message flags of the acknowledged + * messages. + */ + +struct xpc_channel_sn2 { + struct xpc_openclose_args *local_openclose_args; /* args passed on */ + /* opening or closing of channel */ + + void *local_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *local_msgqueue; /* local message queue */ + void *remote_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */ + /* partition's local message queue */ + unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */ + /* local message queue */ + + struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */ + + /* various flavors of local and remote Get/Put values */ + + struct xpc_gp_sn2 *local_GP; /* local Get/Put values */ + struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */ + struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */ + struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */ + s64 next_msg_to_pull; /* Put value of next msg to pull */ + + struct mutex msg_to_pull_mutex; /* next msg to pull serialization */ +}; + +struct xpc_channel_uv { + void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */ + /* gru mq descriptor */ + + struct xpc_send_msg_slot_uv *send_msg_slots; + void *recv_msg_slots; /* each slot will hold a xpc_notify_mq_msg_uv */ + /* structure plus the user's payload */ + + struct xpc_fifo_head_uv msg_slot_free_list; + struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */ +}; + +struct xpc_channel { + short partid; /* ID of remote partition connected */ + spinlock_t lock; /* lock for updating this structure */ + unsigned int flags; /* general flags */ + + enum xp_retval reason; /* reason why channel is disconnect'g */ + int reason_line; /* line# disconnect initiated from */ + + u16 number; /* channel # */ + + u16 entry_size; /* sizeof each msg entry */ + u16 local_nentries; /* #of msg entries in local msg queue */ + u16 remote_nentries; /* #of msg entries in remote msg queue */ + + atomic_t references; /* #of external references to queues */ + + atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ + wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ + + u8 delayed_chctl_flags; /* chctl flags received, but delayed */ + /* action until channel disconnected */ + + atomic_t n_to_notify; /* #of msg senders to notify */ + + xpc_channel_func func; /* user's channel function */ + void *key; /* pointer to user's key */ + + struct completion wdisconnect_wait; /* wait for channel disconnect */ + + /* kthread management related fields */ + + atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ + u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */ + atomic_t kthreads_idle; /* #of kthreads idle waiting for work */ + u32 kthreads_idle_limit; /* limit on #of kthreads idle */ + atomic_t kthreads_active; /* #of kthreads actively working */ + + wait_queue_head_t idle_wq; /* idle kthread wait queue */ + + union { + struct xpc_channel_sn2 sn2; + struct xpc_channel_uv uv; + } sn; + +} ____cacheline_aligned; + +/* struct xpc_channel flags */ + +#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ + +#define XPC_C_ROPENCOMPLETE 0x00000002 /* remote open channel complete */ +#define XPC_C_OPENCOMPLETE 0x00000004 /* local open channel complete */ +#define XPC_C_ROPENREPLY 0x00000008 /* remote open channel reply */ +#define XPC_C_OPENREPLY 0x00000010 /* local open channel reply */ +#define XPC_C_ROPENREQUEST 0x00000020 /* remote open channel request */ +#define XPC_C_OPENREQUEST 0x00000040 /* local open channel request */ + +#define XPC_C_SETUP 0x00000080 /* channel's msgqueues are alloc'd */ +#define XPC_C_CONNECTEDCALLOUT 0x00000100 /* connected callout initiated */ +#define XPC_C_CONNECTEDCALLOUT_MADE \ + 0x00000200 /* connected callout completed */ +#define XPC_C_CONNECTED 0x00000400 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000800 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00001000 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00002000 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00004000 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00008000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00010000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00020000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTINGCALLOUT \ + 0x00040000 /* disconnecting callout initiated */ +#define XPC_C_DISCONNECTINGCALLOUT_MADE \ + 0x00080000 /* disconnecting callout completed */ +#define XPC_C_WDISCONNECT 0x00100000 /* waiting for channel disconnect */ + +/* + * The channel control flags (chctl) union consists of a 64-bit variable which + * is divided up into eight bytes, ordered from right to left. Byte zero + * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte + * can have one or more of the chctl flags set in it. + */ + +union xpc_channel_ctl_flags { + u64 all_flags; + u8 flags[XPC_MAX_NCHANNELS]; +}; + +/* chctl flags */ +#define XPC_CHCTL_CLOSEREQUEST 0x01 +#define XPC_CHCTL_CLOSEREPLY 0x02 +#define XPC_CHCTL_OPENREQUEST 0x04 +#define XPC_CHCTL_OPENREPLY 0x08 +#define XPC_CHCTL_OPENCOMPLETE 0x10 +#define XPC_CHCTL_MSGREQUEST 0x20 + +#define XPC_OPENCLOSE_CHCTL_FLAGS \ + (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \ + XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \ + XPC_CHCTL_OPENCOMPLETE) +#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST + +static inline int +xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) + return 1; + } + return 0; +} + +static inline int +xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + return 1; + } + return 0; +} + +/* + * Manage channels on a partition basis. There is one of these structures + * for each partition (a partition will never utilize the structure that + * represents itself). + */ + +struct xpc_partition_sn2 { + unsigned long remote_amos_page_pa; /* paddr of partition's amos page */ + int activate_IRQ_nasid; /* active partition's act/deact nasid */ + int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */ + + unsigned long remote_vars_pa; /* phys addr of partition's vars */ + unsigned long remote_vars_part_pa; /* paddr of partition's vars part */ + u8 remote_vars_version; /* version# of partition's vars */ + + void *local_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */ + void *remote_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */ + /* Get/Put values */ + unsigned long remote_GPs_pa; /* phys addr of remote partition's local */ + /* Get/Put values */ + + void *local_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *local_openclose_args; /* local's args */ + unsigned long remote_openclose_args_pa; /* phys addr of remote's args */ + + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ + char notify_IRQ_owner[8]; /* notify IRQ's owner's name */ + + struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */ + struct amo *local_chctl_amo_va; /* address of chctl flags' amo */ + + struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */ +}; + +struct xpc_partition_uv { + unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */ + struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */ + /* partition's heartbeat */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */ + /* activate mq's gru mq */ + /* descriptor */ + void *cached_activate_gru_mq_desc; /* cached copy of partition's */ + /* activate mq's gru mq descriptor */ + struct mutex cached_activate_gru_mq_desc_mutex; + spinlock_t flags_lock; /* protect updating of flags */ + unsigned int flags; /* general flags */ + u8 remote_act_state; /* remote partition's act_state */ + u8 act_state_req; /* act_state request from remote partition */ + enum xp_retval reason; /* reason for deactivate act_state request */ +}; + +/* struct xpc_partition_uv flags */ + +#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001 +#define XPC_P_ENGAGED_UV 0x00000002 + +/* struct xpc_partition_uv act_state change requests */ + +#define XPC_P_ASR_ACTIVATE_UV 0x01 +#define XPC_P_ASR_REACTIVATE_UV 0x02 +#define XPC_P_ASR_DEACTIVATE_UV 0x03 + +struct xpc_partition { + + /* XPC HB infrastructure */ + + u8 remote_rp_version; /* version# of partition's rsvd pg */ + unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */ + unsigned long remote_rp_pa; /* phys addr of partition's rsvd pg */ + u64 last_heartbeat; /* HB at last read */ + u32 activate_IRQ_rcvd; /* IRQs since activation */ + spinlock_t act_lock; /* protect updating of act_state */ + u8 act_state; /* from XPC HB viewpoint */ + enum xp_retval reason; /* reason partition is deactivating */ + int reason_line; /* line# deactivation initiated from */ + + unsigned long disengage_timeout; /* timeout in jiffies */ + struct timer_list disengage_timer; + + /* XPC infrastructure referencing and teardown control */ + + u8 setup_state; /* infrastructure setup state */ + wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ + atomic_t references; /* #of references to infrastructure */ + + u8 nchannels; /* #of defined channels supported */ + atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ + atomic_t nchannels_engaged; /* #of channels engaged with remote part */ + struct xpc_channel *channels; /* array of channel structures */ + + /* fields used for managing channel avialability and activity */ + + union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */ + spinlock_t chctl_lock; /* chctl flags lock */ + + void *remote_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ + /* args */ + + /* channel manager related fields */ + + atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ + wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ + + union { + struct xpc_partition_sn2 sn2; + struct xpc_partition_uv uv; + } sn; + +} ____cacheline_aligned; + +struct xpc_arch_operations { + int (*setup_partitions) (void); + void (*teardown_partitions) (void); + void (*process_activate_IRQ_rcvd) (void); + enum xp_retval (*get_partition_rsvd_page_pa) + (void *, u64 *, unsigned long *, size_t *); + int (*setup_rsvd_page) (struct xpc_rsvd_page *); + + void (*allow_hb) (short); + void (*disallow_hb) (short); + void (*disallow_all_hbs) (void); + void (*increment_heartbeat) (void); + void (*offline_heartbeat) (void); + void (*online_heartbeat) (void); + void (*heartbeat_init) (void); + void (*heartbeat_exit) (void); + enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *); + + void (*request_partition_activation) (struct xpc_rsvd_page *, + unsigned long, int); + void (*request_partition_reactivation) (struct xpc_partition *); + void (*request_partition_deactivation) (struct xpc_partition *); + void (*cancel_partition_deactivation_request) (struct xpc_partition *); + enum xp_retval (*setup_ch_structures) (struct xpc_partition *); + void (*teardown_ch_structures) (struct xpc_partition *); + + enum xp_retval (*make_first_contact) (struct xpc_partition *); + + u64 (*get_chctl_all_flags) (struct xpc_partition *); + void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *); + void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *); + void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *); + void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *); + void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *); + void (*process_msg_chctl_flags) (struct xpc_partition *, int); + + enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *, + unsigned long); + + enum xp_retval (*setup_msg_structures) (struct xpc_channel *); + void (*teardown_msg_structures) (struct xpc_channel *); + + void (*indicate_partition_engaged) (struct xpc_partition *); + void (*indicate_partition_disengaged) (struct xpc_partition *); + void (*assume_partition_disengaged) (short); + int (*partition_engaged) (short); + int (*any_partition_engaged) (void); + + int (*n_of_deliverable_payloads) (struct xpc_channel *); + enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *, + u16, u8, xpc_notify_func, void *); + void *(*get_deliverable_payload) (struct xpc_channel *); + void (*received_payload) (struct xpc_channel *, void *); + void (*notify_senders_of_disconnect) (struct xpc_channel *); +}; + +/* struct xpc_partition act_state values (for XPC HB) */ + +#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */ +#define XPC_P_AS_ACTIVATION_REQ 0x01 /* created thread to activate */ +#define XPC_P_AS_ACTIVATING 0x02 /* activation thread started */ +#define XPC_P_AS_ACTIVE 0x03 /* xpc_partition_up() was called */ +#define XPC_P_AS_DEACTIVATING 0x04 /* partition deactivation initiated */ + +#define XPC_DEACTIVATE_PARTITION(_p, _reason) \ + xpc_deactivate_partition(__LINE__, (_p), (_reason)) + +/* struct xpc_partition setup_state values */ + +#define XPC_P_SS_UNSET 0x00 /* infrastructure was never setup */ +#define XPC_P_SS_SETUP 0x01 /* infrastructure is setup */ +#define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ +#define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */ + +/* + * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the + * following interval #of seconds before checking for dropped notify IRQs. + * These can occur whenever an IRQ's associated amo write doesn't complete + * until after the IRQ was received. + */ +#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ) + +/* number of seconds to wait for other partitions to disengage */ +#define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90 + +/* interval in seconds to print 'waiting deactivation' messages */ +#define XPC_DEACTIVATE_PRINTMSG_INTERVAL 10 + +#define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0])) + +/* found in xp_main.c */ +extern struct xpc_registration xpc_registrations[]; + +/* found in xpc_main.c */ +extern struct device *xpc_part; +extern struct device *xpc_chan; +extern struct xpc_arch_operations xpc_arch_ops; +extern int xpc_disengage_timelimit; +extern int xpc_disengage_timedout; +extern int xpc_activate_IRQ_rcvd; +extern spinlock_t xpc_activate_IRQ_rcvd_lock; +extern wait_queue_head_t xpc_activate_IRQ_wq; +extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **); +extern void xpc_activate_partition(struct xpc_partition *); +extern void xpc_activate_kthreads(struct xpc_channel *, int); +extern void xpc_create_kthreads(struct xpc_channel *, int, int); +extern void xpc_disconnect_wait(int); + +/* found in xpc_sn2.c */ +extern int xpc_init_sn2(void); +extern void xpc_exit_sn2(void); + +/* found in xpc_uv.c */ +extern int xpc_init_uv(void); +extern void xpc_exit_uv(void); + +/* found in xpc_partition.c */ +extern int xpc_exiting; +extern int xpc_nasid_mask_nlongs; +extern struct xpc_rsvd_page *xpc_rsvd_page; +extern unsigned long *xpc_mach_nasids; +extern struct xpc_partition *xpc_partitions; +extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **); +extern int xpc_setup_rsvd_page(void); +extern void xpc_teardown_rsvd_page(void); +extern int xpc_identify_activate_IRQ_sender(void); +extern int xpc_partition_disengaged(struct xpc_partition *); +extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *); +extern void xpc_mark_partition_inactive(struct xpc_partition *); +extern void xpc_discovery(void); +extern enum xp_retval xpc_get_remote_rp(int, unsigned long *, + struct xpc_rsvd_page *, + unsigned long *); +extern void xpc_deactivate_partition(const int, struct xpc_partition *, + enum xp_retval); +extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *); + +/* found in xpc_channel.c */ +extern void xpc_initiate_connect(int); +extern void xpc_initiate_disconnect(int); +extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *); +extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16); +extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16, + xpc_notify_func, void *); +extern void xpc_initiate_received(short, int, void *); +extern void xpc_process_sent_chctl_flags(struct xpc_partition *); +extern void xpc_connected_callout(struct xpc_channel *); +extern void xpc_deliver_payload(struct xpc_channel *); +extern void xpc_disconnect_channel(const int, struct xpc_channel *, + enum xp_retval, unsigned long *); +extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); +extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); + +static inline void +xpc_wakeup_channel_mgr(struct xpc_partition *part) +{ + if (atomic_inc_return(&part->channel_mgr_requests) == 1) + wake_up(&part->channel_mgr_wq); +} + +/* + * These next two inlines are used to keep us from tearing down a channel's + * msg queues while a thread may be referencing them. + */ +static inline void +xpc_msgqueue_ref(struct xpc_channel *ch) +{ + atomic_inc(&ch->references); +} + +static inline void +xpc_msgqueue_deref(struct xpc_channel *ch) +{ + s32 refs = atomic_dec_return(&ch->references); + + DBUG_ON(refs < 0); + if (refs == 0) + xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]); +} + +#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \ + xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs) + +/* + * These two inlines are used to keep us from tearing down a partition's + * setup infrastructure while a thread may be referencing it. + */ +static inline void +xpc_part_deref(struct xpc_partition *part) +{ + s32 refs = atomic_dec_return(&part->references); + + DBUG_ON(refs < 0); + if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN) + wake_up(&part->teardown_wq); +} + +static inline int +xpc_part_ref(struct xpc_partition *part) +{ + int setup; + + atomic_inc(&part->references); + setup = (part->setup_state == XPC_P_SS_SETUP); + if (!setup) + xpc_part_deref(part); + + return setup; +} + +/* + * The following macro is to be used for the setting of the reason and + * reason_line fields in both the struct xpc_channel and struct xpc_partition + * structures. + */ +#define XPC_SET_REASON(_p, _reason, _line) \ + { \ + (_p)->reason = _reason; \ + (_p)->reason_line = _line; \ + } + +#endif /* _DRIVERS_MISC_SGIXP_XPC_H */ diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c new file mode 100644 index 00000000000..652593fc486 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_channel.c @@ -0,0 +1,1011 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) channel support. + * + * This is the part of XPC that manages the channels and + * sends/receives messages across them to/from other partitions. + * + */ + +#include <linux/device.h> +#include "xpc.h" + +/* + * Process a connect message from a remote partition. + * + * Note: xpc_process_connect() is expecting to be called with the + * spin_lock_irqsave held and will leave it locked upon return. + */ +static void +xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + enum xp_retval ret; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_ROPENREQUEST)) { + /* nothing more to do for now */ + return; + } + DBUG_ON(!(ch->flags & XPC_C_CONNECTING)); + + if (!(ch->flags & XPC_C_SETUP)) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + ret = xpc_arch_ops.setup_msg_structures(ch); + spin_lock_irqsave(&ch->lock, *irq_flags); + + if (ret != xpSuccess) + XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); + else + ch->flags |= XPC_C_SETUP; + + if (ch->flags & XPC_C_DISCONNECTING) + return; + } + + if (!(ch->flags & XPC_C_OPENREPLY)) { + ch->flags |= XPC_C_OPENREPLY; + xpc_arch_ops.send_chctl_openreply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENREPLY)) + return; + + if (!(ch->flags & XPC_C_OPENCOMPLETE)) { + ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED); + xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENCOMPLETE)) + return; + + dev_info(xpc_chan, "channel %d to partition %d connected\n", + ch->number, ch->partid); + + ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ +} + +/* + * spin_lock_irqsave() is expected to be held on entry. + */ +static void +xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED); + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_DISCONNECTING)) + return; + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + /* make sure all activity has settled down first */ + + if (atomic_read(&ch->kthreads_assigned) > 0 || + atomic_read(&ch->references) > 0) { + return; + } + DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); + + if (part->act_state == XPC_P_AS_DEACTIVATING) { + /* can't proceed until the other side disengages from us */ + if (xpc_arch_ops.partition_engaged(ch->partid)) + return; + + } else { + + /* as long as the other side is up do the full protocol */ + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) + return; + + if (!(ch->flags & XPC_C_CLOSEREPLY)) { + ch->flags |= XPC_C_CLOSEREPLY; + xpc_arch_ops.send_chctl_closereply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_RCLOSEREPLY)) + return; + } + + /* wake those waiting for notify completion */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* we do callout while holding ch->lock, callout can't block */ + xpc_arch_ops.notify_senders_of_disconnect(ch); + } + + /* both sides are disconnected now */ + + if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + xpc_disconnect_callout(ch, xpDisconnected); + spin_lock_irqsave(&ch->lock, *irq_flags); + } + + DBUG_ON(atomic_read(&ch->n_to_notify) != 0); + + /* it's now safe to free the channel's message queues */ + xpc_arch_ops.teardown_msg_structures(ch); + + ch->func = NULL; + ch->key = NULL; + ch->entry_size = 0; + ch->local_nentries = 0; + ch->remote_nentries = 0; + ch->kthreads_assigned_limit = 0; + ch->kthreads_idle_limit = 0; + + /* + * Mark the channel disconnected and clear all other flags, including + * XPC_C_SETUP (because of call to + * xpc_arch_ops.teardown_msg_structures()) but not including + * XPC_C_WDISCONNECT (if it was set). + */ + ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); + + atomic_dec(&part->nchannels_active); + + if (channel_was_connected) { + dev_info(xpc_chan, "channel %d to partition %d disconnected, " + "reason=%d\n", ch->number, ch->partid, ch->reason); + } + + if (ch->flags & XPC_C_WDISCONNECT) { + /* we won't lose the CPU since we're holding ch->lock */ + complete(&ch->wdisconnect_wait); + } else if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + /* time to take action on any delayed chctl flags */ + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); + } + ch->delayed_chctl_flags = 0; + } +} + +/* + * Process a change in the channel's remote connection state. + */ +static void +xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, + u8 chctl_flags) +{ + unsigned long irq_flags; + struct xpc_openclose_args *args = + &part->remote_openclose_args[ch_number]; + struct xpc_channel *ch = &part->channels[ch_number]; + enum xp_retval reason; + enum xp_retval ret; + int create_kthread = 0; + + spin_lock_irqsave(&ch->lock, irq_flags); + +again: + + if ((ch->flags & XPC_C_DISCONNECTED) && + (ch->flags & XPC_C_WDISCONNECT)) { + /* + * Delay processing chctl flags until thread waiting disconnect + * has had a chance to see that the channel is disconnected. + */ + ch->delayed_chctl_flags |= chctl_flags; + goto out; + } + + if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) { + + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received " + "from partid=%d, channel=%d\n", args->reason, + ch->partid, ch->number); + + /* + * If RCLOSEREQUEST is set, we're probably waiting for + * RCLOSEREPLY. We should find it and a ROPENREQUEST packed + * with this RCLOSEREQUEST in the chctl_flags. + */ + + if (ch->flags & XPC_C_RCLOSEREQUEST) { + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); + DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); + + DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY)); + chctl_flags &= ~XPC_CHCTL_CLOSEREPLY; + ch->flags |= XPC_C_RCLOSEREPLY; + + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + goto again; + } + + if (ch->flags & XPC_C_DISCONNECTED) { + if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) { + if (part->chctl.flags[ch_number] & + XPC_CHCTL_OPENREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREQUEST; + spin_unlock(&part->chctl_lock); + } + goto out; + } + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); + } + + chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | + XPC_CHCTL_OPENCOMPLETE); + + /* + * The meaningful CLOSEREQUEST connection state fields are: + * reason = reason connection is to be closed + */ + + ch->flags |= XPC_C_RCLOSEREQUEST; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + reason = args->reason; + if (reason <= xpSuccess || reason > xpUnknownReason) + reason = xpUnknownReason; + else if (reason == xpUnregistering) + reason = xpOtherUnregistering; + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY); + goto out; + } + + xpc_process_disconnect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_CLOSEREPLY) { + + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid=" + "%d, channel=%d\n", ch->partid, ch->number); + + if (ch->flags & XPC_C_DISCONNECTED) { + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { + if (part->chctl.flags[ch_number] & + XPC_CHCTL_CLOSEREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREPLY; + spin_unlock(&part->chctl_lock); + } + goto out; + } + + ch->flags |= XPC_C_RCLOSEREPLY; + + if (ch->flags & XPC_C_CLOSEREPLY) { + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + } + } + + if (chctl_flags & XPC_CHCTL_OPENREQUEST) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, " + "local_nentries=%d) received from partid=%d, " + "channel=%d\n", args->entry_size, args->local_nentries, + ch->partid, ch->number); + + if (part->act_state == XPC_P_AS_DEACTIVATING || + (ch->flags & XPC_C_ROPENREQUEST)) { + goto out; + } + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { + ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST; + goto out; + } + DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | + XPC_C_OPENREQUEST))); + DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_OPENREPLY | XPC_C_CONNECTED)); + + /* + * The meaningful OPENREQUEST connection state fields are: + * entry_size = size of channel's messages in bytes + * local_nentries = remote partition's local_nentries + */ + if (args->entry_size == 0 || args->local_nentries == 0) { + /* assume OPENREQUEST was delayed by mistake */ + goto out; + } + + ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); + ch->remote_nentries = args->local_nentries; + + if (ch->flags & XPC_C_OPENREQUEST) { + if (args->entry_size != ch->entry_size) { + XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, + &irq_flags); + goto out; + } + } else { + ch->entry_size = args->entry_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + } + + xpc_process_connect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_OPENREPLY) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa=" + "0x%lx, local_nentries=%d, remote_nentries=%d) " + "received from partid=%d, channel=%d\n", + args->local_msgqueue_pa, args->local_nentries, + args->remote_nentries, ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + goto out; + + if (!(ch->flags & XPC_C_OPENREQUEST)) { + XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, + &irq_flags); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(ch->flags & XPC_C_CONNECTED); + + /* + * The meaningful OPENREPLY connection state fields are: + * local_msgqueue_pa = physical address of remote + * partition's local_msgqueue + * local_nentries = remote partition's local_nentries + * remote_nentries = remote partition's remote_nentries + */ + DBUG_ON(args->local_msgqueue_pa == 0); + DBUG_ON(args->local_nentries == 0); + DBUG_ON(args->remote_nentries == 0); + + ret = xpc_arch_ops.save_remote_msgqueue_pa(ch, + args->local_msgqueue_pa); + if (ret != xpSuccess) { + XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags); + goto out; + } + ch->flags |= XPC_C_ROPENREPLY; + + if (args->local_nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " + "remote_nentries=%d, old remote_nentries=%d, " + "partid=%d, channel=%d\n", + args->local_nentries, ch->remote_nentries, + ch->partid, ch->number); + + ch->remote_nentries = args->local_nentries; + } + if (args->remote_nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " + "local_nentries=%d, old local_nentries=%d, " + "partid=%d, channel=%d\n", + args->remote_nentries, ch->local_nentries, + ch->partid, ch->number); + + ch->local_nentries = args->remote_nentries; + } + + xpc_process_connect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + goto out; + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_OPENREPLY)) { + XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, + &irq_flags); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY)); + DBUG_ON(!(ch->flags & XPC_C_CONNECTED)); + + ch->flags |= XPC_C_ROPENCOMPLETE; + + xpc_process_connect(ch, &irq_flags); + create_kthread = 1; + } + +out: + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (create_kthread) + xpc_create_kthreads(ch, 1, 0); +} + +/* + * Attempt to establish a channel connection to a remote partition. + */ +static enum xp_retval +xpc_connect_channel(struct xpc_channel *ch) +{ + unsigned long irq_flags; + struct xpc_registration *registration = &xpc_registrations[ch->number]; + + if (mutex_trylock(®istration->mutex) == 0) + return xpRetry; + + if (!XPC_CHANNEL_REGISTERED(ch->number)) { + mutex_unlock(®istration->mutex); + return xpUnregistered; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + + DBUG_ON(ch->flags & XPC_C_CONNECTED); + DBUG_ON(ch->flags & XPC_C_OPENREQUEST); + + if (ch->flags & XPC_C_DISCONNECTING) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + mutex_unlock(®istration->mutex); + return ch->reason; + } + + /* add info from the channel connect registration to the channel */ + + ch->kthreads_assigned_limit = registration->assigned_limit; + ch->kthreads_idle_limit = registration->idle_limit; + DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + DBUG_ON(atomic_read(&ch->kthreads_idle) != 0); + DBUG_ON(atomic_read(&ch->kthreads_active) != 0); + + ch->func = registration->func; + DBUG_ON(registration->func == NULL); + ch->key = registration->key; + + ch->local_nentries = registration->nentries; + + if (ch->flags & XPC_C_ROPENREQUEST) { + if (registration->entry_size != ch->entry_size) { + /* the local and remote sides aren't the same */ + + /* + * Because XPC_DISCONNECT_CHANNEL() can block we're + * forced to up the registration sema before we unlock + * the channel lock. But that's okay here because we're + * done with the part that required the registration + * sema. XPC_DISCONNECT_CHANNEL() requires that the + * channel lock be locked and will unlock and relock + * the channel lock as needed. + */ + mutex_unlock(®istration->mutex); + XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpUnequalMsgSizes; + } + } else { + ch->entry_size = registration->entry_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&xpc_partitions[ch->partid].nchannels_active); + } + + mutex_unlock(®istration->mutex); + + /* initiate the connection */ + + ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); + xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags); + + xpc_process_connect(ch, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + return xpSuccess; +} + +void +xpc_process_sent_chctl_flags(struct xpc_partition *part) +{ + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + struct xpc_channel *ch; + int ch_number; + u32 ch_flags; + + chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part); + + /* + * Initiate channel connections for registered channels. + * + * For each connected channel that has pending messages activate idle + * kthreads and/or create new kthreads as needed. + */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + /* + * Process any open or close related chctl flags, and then deal + * with connecting or disconnecting the channel as required. + */ + + if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) { + xpc_process_openclose_chctl_flags(part, ch_number, + chctl.flags[ch_number]); + } + + ch_flags = ch->flags; /* need an atomic snapshot of flags */ + + if (ch_flags & XPC_C_DISCONNECTING) { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_disconnect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + continue; + } + + if (part->act_state == XPC_P_AS_DEACTIVATING) + continue; + + if (!(ch_flags & XPC_C_CONNECTED)) { + if (!(ch_flags & XPC_C_OPENREQUEST)) { + DBUG_ON(ch_flags & XPC_C_SETUP); + (void)xpc_connect_channel(ch); + } + continue; + } + + /* + * Process any message related chctl flags, this may involve + * the activation of kthreads to deliver any pending messages + * sent from the other partition. + */ + + if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + xpc_arch_ops.process_msg_chctl_flags(part, ch_number); + } +} + +/* + * XPC's heartbeat code calls this function to inform XPC that a partition is + * going down. XPC responds by tearing down the XPartition Communication + * infrastructure used for the just downed partition. + * + * XPC's heartbeat code will never call this function and xpc_partition_up() + * at the same time. Nor will it ever make multiple calls to either function + * at the same time. + */ +void +xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason) +{ + unsigned long irq_flags; + int ch_number; + struct xpc_channel *ch; + + dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n", + XPC_PARTID(part), reason); + + if (!xpc_part_ref(part)) { + /* infrastructure for this partition isn't currently set up */ + return; + } + + /* disconnect channels associated with the partition going down */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + xpc_msgqueue_ref(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_msgqueue_deref(ch); + } + + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); +} + +/* + * Called by XP at the time of channel connection registration to cause + * XPC to establish connections to all currently active partitions. + */ +void +xpc_initiate_connect(int ch_number) +{ + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + + /* + * Initiate the establishment of a connection on the + * newly registered channel to the remote partition. + */ + xpc_wakeup_channel_mgr(part); + xpc_part_deref(part); + } + } +} + +void +xpc_connected_callout(struct xpc_channel *ch) +{ + /* let the registerer know that a connection has been established */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=xpConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + ch->func(xpConnected, ch->partid, ch->number, + (void *)(u64)ch->local_nentries, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=xpConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + } +} + +/* + * Called by XP at the time of channel connection unregistration to cause + * XPC to teardown all current connections for the specified channel. + * + * Before returning xpc_initiate_disconnect() will wait until all connections + * on the specified channel have been closed/torndown. So the caller can be + * assured that they will not be receiving any more callouts from XPC to the + * function they registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_initiate_disconnect(int ch_number) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + /* initiate the channel disconnect for every active partition */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + + if (!(ch->flags & XPC_C_DISCONNECTED)) { + ch->flags |= XPC_C_WDISCONNECT; + + XPC_DISCONNECT_CHANNEL(ch, xpUnregistering, + &irq_flags); + } + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + } + } + + xpc_disconnect_wait(ch_number); +} + +/* + * To disconnect a channel, and reflect it back to all who may be waiting. + * + * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by + * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by + * xpc_disconnect_wait(). + * + * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. + */ +void +xpc_disconnect_channel(const int line, struct xpc_channel *ch, + enum xp_retval reason, unsigned long *irq_flags) +{ + u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED); + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + return; + + DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED))); + + dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n", + reason, line, ch->partid, ch->number); + + XPC_SET_REASON(ch, reason, line); + + ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING); + /* some of these may not have been set */ + ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | + XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_CONNECTING | XPC_C_CONNECTED); + + xpc_arch_ops.send_chctl_closerequest(ch, irq_flags); + + if (channel_was_connected) + ch->flags |= XPC_C_WASCONNECTED; + + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + /* wake all idle kthreads so they can exit */ + if (atomic_read(&ch->kthreads_idle) > 0) { + wake_up_all(&ch->idle_wq); + + } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + /* start a kthread that will do the xpDisconnecting callout */ + xpc_create_kthreads(ch, 1, 1); + } + + /* wake those waiting to allocate an entry from the local msg queue */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); + + spin_lock_irqsave(&ch->lock, *irq_flags); +} + +void +xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason) +{ + /* + * Let the channel's registerer know that the channel is being + * disconnected. We don't want to do this if the registerer was never + * informed of a connection being made. + */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); + + ch->func(reason, ch->partid, ch->number, NULL, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); + } +} + +/* + * Wait for a message entry to become available for the specified channel, + * but don't wait any longer than 1 jiffy. + */ +enum xp_retval +xpc_allocate_msg_wait(struct xpc_channel *ch) +{ + enum xp_retval ret; + + if (ch->flags & XPC_C_DISCONNECTING) { + DBUG_ON(ch->reason == xpInterrupted); + return ch->reason; + } + + atomic_inc(&ch->n_on_msg_allocate_wq); + ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1); + atomic_dec(&ch->n_on_msg_allocate_wq); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + DBUG_ON(ch->reason == xpInterrupted); + } else if (ret == 0) { + ret = xpTimeout; + } else { + ret = xpInterrupted; + } + + return ret; +} + +/* + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. + * + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. + * + * Once sent, this routine will not wait for the message to be received, nor + * will notification be given when it does happen. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. + */ +enum xp_retval +xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xp_retval ret = xpUnknownReason; + + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, + partid, ch_number); + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(payload == NULL); + + if (xpc_part_ref(part)) { + ret = xpc_arch_ops.send_payload(&part->channels[ch_number], + flags, payload, payload_size, 0, NULL, NULL); + xpc_part_deref(part); + } + + return ret; +} + +/* + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. + * + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. + * + * This routine will not wait for the message to be sent or received. + * + * Once the remote end of the channel has received the message, the function + * passed as an argument to xpc_initiate_send_notify() will be called. This + * allows the sender to free up or re-use any buffers referenced by the + * message, but does NOT mean the message has been processed at the remote + * end by a receiver. + * + * If this routine returns an error, the caller's function will NOT be called. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. + * func - function to call with asynchronous notification of message + * receipt. THIS FUNCTION MUST BE NON-BLOCKING. + * key - user-defined key to be passed to the function when it's called. + */ +enum xp_retval +xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xp_retval ret = xpUnknownReason; + + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, + partid, ch_number); + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(payload == NULL); + DBUG_ON(func == NULL); + + if (xpc_part_ref(part)) { + ret = xpc_arch_ops.send_payload(&part->channels[ch_number], + flags, payload, payload_size, XPC_N_CALL, func, key); + xpc_part_deref(part); + } + return ret; +} + +/* + * Deliver a message's payload to its intended recipient. + */ +void +xpc_deliver_payload(struct xpc_channel *ch) +{ + void *payload; + + payload = xpc_arch_ops.get_deliverable_payload(ch); + if (payload != NULL) { + + /* + * This ref is taken to protect the payload itself from being + * freed before the user is finished with it, which the user + * indicates by calling xpc_initiate_received(). + */ + xpc_msgqueue_ref(ch); + + atomic_inc(&ch->kthreads_active); + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, + ch->number); + + /* deliver the message to its intended recipient */ + ch->func(xpMsgReceived, ch->partid, ch->number, payload, + ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, + ch->number); + } + + atomic_dec(&ch->kthreads_active); + } +} + +/* + * Acknowledge receipt of a delivered message's payload. + * + * This function, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # message received on. + * payload - pointer to the payload area allocated via + * xpc_initiate_send() or xpc_initiate_send_notify(). + */ +void +xpc_initiate_received(short partid, int ch_number, void *payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + + ch = &part->channels[ch_number]; + xpc_arch_ops.received_payload(ch, payload); + + /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */ + xpc_msgqueue_deref(ch); +} diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c new file mode 100644 index 00000000000..832ed4c88cf --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -0,0 +1,1343 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) support - standard version. + * + * XPC provides a message passing capability that crosses partition + * boundaries. This module is made up of two parts: + * + * partition This part detects the presence/absence of other + * partitions. It provides a heartbeat and monitors + * the heartbeats of other partitions. + * + * channel This part manages the channels and sends/receives + * messages across them to/from other partitions. + * + * There are a couple of additional functions residing in XP, which + * provide an interface to XPC for its users. + * + * + * Caveats: + * + * . Currently on sn2, we have no way to determine which nasid an IRQ + * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write + * followed by an IPI. The amo indicates where data is to be pulled + * from, so after the IPI arrives, the remote partition checks the amo + * word. The IPI can actually arrive before the amo however, so other + * code must periodically check for this case. Also, remote amo + * operations do not reliably time out. Thus we do a remote PIO read + * solely to know whether the remote partition is down and whether we + * should stop sending IPIs to it. This remote PIO read operation is + * set up in a special nofault region so SAL knows to ignore (and + * cleanup) any errors due to the remote amo write, PIO read, and/or + * PIO write operations. + * + * If/when new hardware solves this IPI problem, we should abandon + * the current approach. + * + */ + +#include <linux/module.h> +#include <linux/sysctl.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/kdebug.h> +#include <linux/kthread.h> +#include "xpc.h" + +/* define two XPC debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpc_dbg_name = { + .name = "xpc" +}; + +struct device xpc_part_dbg_subname = { + .init_name = "", /* set to "part" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device xpc_chan_dbg_subname = { + .init_name = "", /* set to "chan" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device *xpc_part = &xpc_part_dbg_subname; +struct device *xpc_chan = &xpc_chan_dbg_subname; + +static int xpc_kdebug_ignore; + +/* systune related variables for /proc/sys directories */ + +static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; +static int xpc_hb_min_interval = 1; +static int xpc_hb_max_interval = 10; + +static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; +static int xpc_hb_check_min_interval = 10; +static int xpc_hb_check_max_interval = 120; + +int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT; +static int xpc_disengage_min_timelimit; /* = 0 */ +static int xpc_disengage_max_timelimit = 120; + +static ctl_table xpc_sys_xpc_hb_dir[] = { + { + .procname = "hb_interval", + .data = &xpc_hb_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_hb_min_interval, + .extra2 = &xpc_hb_max_interval}, + { + .procname = "hb_check_interval", + .data = &xpc_hb_check_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_hb_check_min_interval, + .extra2 = &xpc_hb_check_max_interval}, + {} +}; +static ctl_table xpc_sys_xpc_dir[] = { + { + .procname = "hb", + .mode = 0555, + .child = xpc_sys_xpc_hb_dir}, + { + .procname = "disengage_timelimit", + .data = &xpc_disengage_timelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_disengage_min_timelimit, + .extra2 = &xpc_disengage_max_timelimit}, + {} +}; +static ctl_table xpc_sys_dir[] = { + { + .procname = "xpc", + .mode = 0555, + .child = xpc_sys_xpc_dir}, + {} +}; +static struct ctl_table_header *xpc_sysctl; + +/* non-zero if any remote partition disengage was timed out */ +int xpc_disengage_timedout; + +/* #of activate IRQs received and not yet processed */ +int xpc_activate_IRQ_rcvd; +DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock); + +/* IRQ handler notifies this wait queue on receipt of an IRQ */ +DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq); + +static unsigned long xpc_hb_check_timeout; +static struct timer_list xpc_hb_timer; + +/* notification that the xpc_hb_checker thread has exited */ +static DECLARE_COMPLETION(xpc_hb_checker_exited); + +/* notification that the xpc_discovery thread has exited */ +static DECLARE_COMPLETION(xpc_discovery_exited); + +static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); + +static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); +static struct notifier_block xpc_reboot_notifier = { + .notifier_call = xpc_system_reboot, +}; + +static int xpc_system_die(struct notifier_block *, unsigned long, void *); +static struct notifier_block xpc_die_notifier = { + .notifier_call = xpc_system_die, +}; + +struct xpc_arch_operations xpc_arch_ops; + +/* + * Timer function to enforce the timelimit on the partition disengage. + */ +static void +xpc_timeout_partition_disengage(unsigned long data) +{ + struct xpc_partition *part = (struct xpc_partition *)data; + + DBUG_ON(time_is_after_jiffies(part->disengage_timeout)); + + (void)xpc_partition_disengaged(part); + + DBUG_ON(part->disengage_timeout != 0); + DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part))); +} + +/* + * Timer to produce the heartbeat. The timer structures function is + * already set when this is initially called. A tunable is used to + * specify when the next timeout should occur. + */ +static void +xpc_hb_beater(unsigned long dummy) +{ + xpc_arch_ops.increment_heartbeat(); + + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) + wake_up_interruptible(&xpc_activate_IRQ_wq); + + xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); + add_timer(&xpc_hb_timer); +} + +static void +xpc_start_hb_beater(void) +{ + xpc_arch_ops.heartbeat_init(); + init_timer(&xpc_hb_timer); + xpc_hb_timer.function = xpc_hb_beater; + xpc_hb_beater(0); +} + +static void +xpc_stop_hb_beater(void) +{ + del_timer_sync(&xpc_hb_timer); + xpc_arch_ops.heartbeat_exit(); +} + +/* + * At periodic intervals, scan through all active partitions and ensure + * their heartbeat is still active. If not, the partition is deactivated. + */ +static void +xpc_check_remote_hb(void) +{ + struct xpc_partition *part; + short partid; + enum xp_retval ret; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + + if (xpc_exiting) + break; + + if (partid == xp_partition_id) + continue; + + part = &xpc_partitions[partid]; + + if (part->act_state == XPC_P_AS_INACTIVE || + part->act_state == XPC_P_AS_DEACTIVATING) { + continue; + } + + ret = xpc_arch_ops.get_remote_heartbeat(part); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(part, ret); + } +} + +/* + * This thread is responsible for nearly all of the partition + * activation/deactivation. + */ +static int +xpc_hb_checker(void *ignore) +{ + int force_IRQ = 0; + + /* this thread was marked active by xpc_hb_init() */ + + set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU)); + + /* set our heartbeating to other partitions into motion */ + xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); + xpc_start_hb_beater(); + + while (!xpc_exiting) { + + dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " + "been received\n", + (int)(xpc_hb_check_timeout - jiffies), + xpc_activate_IRQ_rcvd); + + /* checking of remote heartbeats is skewed by IRQ handling */ + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) { + xpc_hb_check_timeout = jiffies + + (xpc_hb_check_interval * HZ); + + dev_dbg(xpc_part, "checking remote heartbeats\n"); + xpc_check_remote_hb(); + + /* + * On sn2 we need to periodically recheck to ensure no + * IRQ/amo pairs have been missed. + */ + if (is_shub()) + force_IRQ = 1; + } + + /* check for outstanding IRQs */ + if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) { + force_IRQ = 0; + dev_dbg(xpc_part, "processing activate IRQs " + "received\n"); + xpc_arch_ops.process_activate_IRQ_rcvd(); + } + + /* wait for IRQ or timeout */ + (void)wait_event_interruptible(xpc_activate_IRQ_wq, + (time_is_before_eq_jiffies( + xpc_hb_check_timeout) || + xpc_activate_IRQ_rcvd > 0 || + xpc_exiting)); + } + + xpc_stop_hb_beater(); + + dev_dbg(xpc_part, "heartbeat checker is exiting\n"); + + /* mark this thread as having exited */ + complete(&xpc_hb_checker_exited); + return 0; +} + +/* + * This thread will attempt to discover other partitions to activate + * based on info provided by SAL. This new thread is short lived and + * will exit once discovery is complete. + */ +static int +xpc_initiate_discovery(void *ignore) +{ + xpc_discovery(); + + dev_dbg(xpc_part, "discovery thread is exiting\n"); + + /* mark this thread as having exited */ + complete(&xpc_discovery_exited); + return 0; +} + +/* + * The first kthread assigned to a newly activated partition is the one + * created by XPC HB with which it calls xpc_activating(). XPC hangs on to + * that kthread until the partition is brought down, at which time that kthread + * returns back to XPC HB. (The return of that kthread will signify to XPC HB + * that XPC has dismantled all communication infrastructure for the associated + * partition.) This kthread becomes the channel manager for that partition. + * + * Each active partition has a channel manager, who, besides connecting and + * disconnecting channels, will ensure that each of the partition's connected + * channels has the required number of assigned kthreads to get the work done. + */ +static void +xpc_channel_mgr(struct xpc_partition *part) +{ + while (part->act_state != XPC_P_AS_DEACTIVATING || + atomic_read(&part->nchannels_active) > 0 || + !xpc_partition_disengaged(part)) { + + xpc_process_sent_chctl_flags(part); + + /* + * Wait until we've been requested to activate kthreads or + * all of the channel's message queues have been torn down or + * a signal is pending. + * + * The channel_mgr_requests is set to 1 after being awakened, + * This is done to prevent the channel mgr from making one pass + * through the loop for each request, since he will + * be servicing all the requests in one pass. The reason it's + * set to 1 instead of 0 is so that other kthreads will know + * that the channel mgr is running and won't bother trying to + * wake him up. + */ + atomic_dec(&part->channel_mgr_requests); + (void)wait_event_interruptible(part->channel_mgr_wq, + (atomic_read(&part->channel_mgr_requests) > 0 || + part->chctl.all_flags != 0 || + (part->act_state == XPC_P_AS_DEACTIVATING && + atomic_read(&part->nchannels_active) == 0 && + xpc_partition_disengaged(part)))); + atomic_set(&part->channel_mgr_requests, 1); + } +} + +/* + * Guarantee that the kzalloc'd memory is cacheline aligned. + */ +void * +xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kzalloc will give us cachline aligned memory by default */ + *base = kzalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kzalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Setup the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static enum xp_retval +xpc_setup_ch_structures(struct xpc_partition *part) +{ + enum xp_retval ret; + int ch_number; + struct xpc_channel *ch; + short partid = XPC_PARTID(part); + + /* + * Allocate all of the channel structures as a contiguous chunk of + * memory. + */ + DBUG_ON(part->channels != NULL); + part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS, + GFP_KERNEL); + if (part->channels == NULL) { + dev_err(xpc_chan, "can't get memory for channels\n"); + return xpNoMemory; + } + + /* allocate the remote open and close args */ + + part->remote_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part-> + remote_openclose_args_base); + if (part->remote_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for remote connect args\n"); + ret = xpNoMemory; + goto out_1; + } + + part->chctl.all_flags = 0; + spin_lock_init(&part->chctl_lock); + + atomic_set(&part->channel_mgr_requests, 1); + init_waitqueue_head(&part->channel_mgr_wq); + + part->nchannels = XPC_MAX_NCHANNELS; + + atomic_set(&part->nchannels_active, 0); + atomic_set(&part->nchannels_engaged, 0); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + ch->partid = partid; + ch->number = ch_number; + ch->flags = XPC_C_DISCONNECTED; + + atomic_set(&ch->kthreads_assigned, 0); + atomic_set(&ch->kthreads_idle, 0); + atomic_set(&ch->kthreads_active, 0); + + atomic_set(&ch->references, 0); + atomic_set(&ch->n_to_notify, 0); + + spin_lock_init(&ch->lock); + init_completion(&ch->wdisconnect_wait); + + atomic_set(&ch->n_on_msg_allocate_wq, 0); + init_waitqueue_head(&ch->msg_allocate_wq); + init_waitqueue_head(&ch->idle_wq); + } + + ret = xpc_arch_ops.setup_ch_structures(part); + if (ret != xpSuccess) + goto out_2; + + /* + * With the setting of the partition setup_state to XPC_P_SS_SETUP, + * we're declaring that this partition is ready to go. + */ + part->setup_state = XPC_P_SS_SETUP; + + return xpSuccess; + + /* setup of ch structures failed */ +out_2: + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; +out_1: + kfree(part->channels); + part->channels = NULL; + return ret; +} + +/* + * Teardown the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static void +xpc_teardown_ch_structures(struct xpc_partition *part) +{ + DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); + DBUG_ON(atomic_read(&part->nchannels_active) != 0); + + /* + * Make this partition inaccessible to local processes by marking it + * as no longer setup. Then wait before proceeding with the teardown + * until all existing references cease. + */ + DBUG_ON(part->setup_state != XPC_P_SS_SETUP); + part->setup_state = XPC_P_SS_WTEARDOWN; + + wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); + + /* now we can begin tearing down the infrastructure */ + + xpc_arch_ops.teardown_ch_structures(part); + + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->channels); + part->channels = NULL; + + part->setup_state = XPC_P_SS_TORNDOWN; +} + +/* + * When XPC HB determines that a partition has come up, it will create a new + * kthread and that kthread will call this function to attempt to set up the + * basic infrastructure used for Cross Partition Communication with the newly + * upped partition. + * + * The kthread that was created by XPC HB and which setup the XPC + * infrastructure will remain assigned to the partition becoming the channel + * manager for that partition until the partition is deactivating, at which + * time the kthread will teardown the XPC infrastructure and then exit. + */ +static int +xpc_activating(void *__partid) +{ + short partid = (u64)__partid; + struct xpc_partition *part = &xpc_partitions[partid]; + unsigned long irq_flags; + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_AS_DEACTIVATING) { + part->act_state = XPC_P_AS_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; + return 0; + } + + /* indicate the thread is activating */ + DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ); + part->act_state = XPC_P_AS_ACTIVATING; + + XPC_SET_REASON(part, 0, 0); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + dev_dbg(xpc_part, "activating partition %d\n", partid); + + xpc_arch_ops.allow_hb(partid); + + if (xpc_setup_ch_structures(part) == xpSuccess) { + (void)xpc_part_ref(part); /* this will always succeed */ + + if (xpc_arch_ops.make_first_contact(part) == xpSuccess) { + xpc_mark_partition_active(part); + xpc_channel_mgr(part); + /* won't return until partition is deactivating */ + } + + xpc_part_deref(part); + xpc_teardown_ch_structures(part); + } + + xpc_arch_ops.disallow_hb(partid); + xpc_mark_partition_inactive(part); + + if (part->reason == xpReactivating) { + /* interrupting ourselves results in activating partition */ + xpc_arch_ops.request_partition_reactivation(part); + } + + return 0; +} + +void +xpc_activate_partition(struct xpc_partition *part) +{ + short partid = XPC_PARTID(part); + unsigned long irq_flags; + struct task_struct *kthread; + + spin_lock_irqsave(&part->act_lock, irq_flags); + + DBUG_ON(part->act_state != XPC_P_AS_INACTIVE); + + part->act_state = XPC_P_AS_ACTIVATION_REQ; + XPC_SET_REASON(part, xpCloneKThread, __LINE__); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d", + partid); + if (IS_ERR(kthread)) { + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + } +} + +void +xpc_activate_kthreads(struct xpc_channel *ch, int needed) +{ + int idle = atomic_read(&ch->kthreads_idle); + int assigned = atomic_read(&ch->kthreads_assigned); + int wakeup; + + DBUG_ON(needed <= 0); + + if (idle > 0) { + wakeup = (needed > idle) ? idle : needed; + needed -= wakeup; + + dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, " + "channel=%d\n", wakeup, ch->partid, ch->number); + + /* only wakeup the requested number of kthreads */ + wake_up_nr(&ch->idle_wq, wakeup); + } + + if (needed <= 0) + return; + + if (needed + assigned > ch->kthreads_assigned_limit) { + needed = ch->kthreads_assigned_limit - assigned; + if (needed <= 0) + return; + } + + dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", + needed, ch->partid, ch->number); + + xpc_create_kthreads(ch, needed, 0); +} + +/* + * This function is where XPC's kthreads wait for messages to deliver. + */ +static void +xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) +{ + int (*n_of_deliverable_payloads) (struct xpc_channel *) = + xpc_arch_ops.n_of_deliverable_payloads; + + do { + /* deliver messages to their intended recipients */ + + while (n_of_deliverable_payloads(ch) > 0 && + !(ch->flags & XPC_C_DISCONNECTING)) { + xpc_deliver_payload(ch); + } + + if (atomic_inc_return(&ch->kthreads_idle) > + ch->kthreads_idle_limit) { + /* too many idle kthreads on this channel */ + atomic_dec(&ch->kthreads_idle); + break; + } + + dev_dbg(xpc_chan, "idle kthread calling " + "wait_event_interruptible_exclusive()\n"); + + (void)wait_event_interruptible_exclusive(ch->idle_wq, + (n_of_deliverable_payloads(ch) > 0 || + (ch->flags & XPC_C_DISCONNECTING))); + + atomic_dec(&ch->kthreads_idle); + + } while (!(ch->flags & XPC_C_DISCONNECTING)); +} + +static int +xpc_kthread_start(void *args) +{ + short partid = XPC_UNPACK_ARG1(args); + u16 ch_number = XPC_UNPACK_ARG2(args); + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + int n_needed; + unsigned long irq_flags; + int (*n_of_deliverable_payloads) (struct xpc_channel *) = + xpc_arch_ops.n_of_deliverable_payloads; + + dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", + partid, ch_number); + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + + /* let registerer know that connection has been established */ + + spin_lock_irqsave(&ch->lock, irq_flags); + if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { + ch->flags |= XPC_C_CONNECTEDCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_connected_callout(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + /* + * It is possible that while the callout was being + * made that the remote partition sent some messages. + * If that is the case, we may need to activate + * additional kthreads to help deliver them. We only + * need one less than total #of messages to deliver. + */ + n_needed = n_of_deliverable_payloads(ch) - 1; + if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) + xpc_activate_kthreads(ch, n_needed); + + } else { + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + + xpc_kthread_waitmsgs(part, ch); + } + + /* let registerer know that connection is disconnecting */ + + spin_lock_irqsave(&ch->lock, irq_flags); + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_disconnect_callout(ch, xpDisconnecting); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + xpc_arch_ops.indicate_partition_disengaged(part); + } + + xpc_msgqueue_deref(ch); + + dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", + partid, ch_number); + + xpc_part_deref(part); + return 0; +} + +/* + * For each partition that XPC has established communications with, there is + * a minimum of one kernel thread assigned to perform any operation that + * may potentially sleep or block (basically the callouts to the asynchronous + * functions registered via xpc_connect()). + * + * Additional kthreads are created and destroyed by XPC as the workload + * demands. + * + * A kthread is assigned to one of the active channels that exists for a given + * partition. + */ +void +xpc_create_kthreads(struct xpc_channel *ch, int needed, + int ignore_disconnecting) +{ + unsigned long irq_flags; + u64 args = XPC_PACK_ARGS(ch->partid, ch->number); + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct task_struct *kthread; + void (*indicate_partition_disengaged) (struct xpc_partition *) = + xpc_arch_ops.indicate_partition_disengaged; + + while (needed-- > 0) { + + /* + * The following is done on behalf of the newly created + * kthread. That kthread is responsible for doing the + * counterpart to the following before it exits. + */ + if (ignore_disconnecting) { + if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { + /* kthreads assigned had gone to zero */ + BUG_ON(!(ch->flags & + XPC_C_DISCONNECTINGCALLOUT_MADE)); + break; + } + + } else if (ch->flags & XPC_C_DISCONNECTING) { + break; + + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 && + atomic_inc_return(&part->nchannels_engaged) == 1) { + xpc_arch_ops.indicate_partition_engaged(part); + } + (void)xpc_part_ref(part); + xpc_msgqueue_ref(ch); + + kthread = kthread_run(xpc_kthread_start, (void *)args, + "xpc%02dc%d", ch->partid, ch->number); + if (IS_ERR(kthread)) { + /* the fork failed */ + + /* + * NOTE: if (ignore_disconnecting && + * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, + * then we'll deadlock if all other kthreads assigned + * to this channel are blocked in the channel's + * registerer, because the only thing that will unblock + * them is the xpDisconnecting callout that this + * failed kthread_run() would have made. + */ + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + indicate_partition_disengaged(part); + } + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + + if (atomic_read(&ch->kthreads_assigned) < + ch->kthreads_idle_limit) { + /* + * Flag this as an error only if we have an + * insufficient #of kthreads for the channel + * to function. + */ + spin_lock_irqsave(&ch->lock, irq_flags); + XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + break; + } + } +} + +void +xpc_disconnect_wait(int ch_number) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + int wakeup_channel_mgr; + + /* now wait for all callouts to the caller's function to cease */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (!xpc_part_ref(part)) + continue; + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_WDISCONNECT)) { + xpc_part_deref(part); + continue; + } + + wait_for_completion(&ch->wdisconnect_wait); + + spin_lock_irqsave(&ch->lock, irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + wakeup_channel_mgr = 0; + + if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); + wakeup_channel_mgr = 1; + } + ch->delayed_chctl_flags = 0; + } + + ch->flags &= ~XPC_C_WDISCONNECT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (wakeup_channel_mgr) + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); + } +} + +static int +xpc_setup_partitions(void) +{ + short partid; + struct xpc_partition *part; + + xpc_partitions = kzalloc(sizeof(struct xpc_partition) * + xp_max_npartitions, GFP_KERNEL); + if (xpc_partitions == NULL) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + return -ENOMEM; + } + + /* + * The first few fields of each entry of xpc_partitions[] need to + * be initialized now so that calls to xpc_connect() and + * xpc_disconnect() can be made prior to the activation of any remote + * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE + * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING + * PARTITION HAS BEEN ACTIVATED. + */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part)); + + part->activate_IRQ_rcvd = 0; + spin_lock_init(&part->act_lock); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, 0, 0); + + init_timer(&part->disengage_timer); + part->disengage_timer.function = + xpc_timeout_partition_disengage; + part->disengage_timer.data = (unsigned long)part; + + part->setup_state = XPC_P_SS_UNSET; + init_waitqueue_head(&part->teardown_wq); + atomic_set(&part->references, 0); + } + + return xpc_arch_ops.setup_partitions(); +} + +static void +xpc_teardown_partitions(void) +{ + xpc_arch_ops.teardown_partitions(); + kfree(xpc_partitions); +} + +static void +xpc_do_exit(enum xp_retval reason) +{ + short partid; + int active_part_count, printed_waiting_msg = 0; + struct xpc_partition *part; + unsigned long printmsg_time, disengage_timeout = 0; + + /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ + DBUG_ON(xpc_exiting == 1); + + /* + * Let the heartbeat checker thread and the discovery thread + * (if one is running) know that they should exit. Also wake up + * the heartbeat checker thread in case it's sleeping. + */ + xpc_exiting = 1; + wake_up_interruptible(&xpc_activate_IRQ_wq); + + /* wait for the discovery thread to exit */ + wait_for_completion(&xpc_discovery_exited); + + /* wait for the heartbeat checker thread to exit */ + wait_for_completion(&xpc_hb_checker_exited); + + /* sleep for a 1/3 of a second or so */ + (void)msleep_interruptible(300); + + /* wait for all partitions to become inactive */ + + printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + xpc_disengage_timedout = 0; + + do { + active_part_count = 0; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_AS_INACTIVE) { + continue; + } + + active_part_count++; + + XPC_DEACTIVATE_PARTITION(part, reason); + + if (part->disengage_timeout > disengage_timeout) + disengage_timeout = part->disengage_timeout; + } + + if (xpc_arch_ops.any_partition_engaged()) { + if (time_is_before_jiffies(printmsg_time)) { + dev_info(xpc_part, "waiting for remote " + "partitions to deactivate, timeout in " + "%ld seconds\n", (disengage_timeout - + jiffies) / HZ); + printmsg_time = jiffies + + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + printed_waiting_msg = 1; + } + + } else if (active_part_count > 0) { + if (printed_waiting_msg) { + dev_info(xpc_part, "waiting for local partition" + " to deactivate\n"); + printed_waiting_msg = 0; + } + + } else { + if (!xpc_disengage_timedout) { + dev_info(xpc_part, "all partitions have " + "deactivated\n"); + } + break; + } + + /* sleep for a 1/3 of a second or so */ + (void)msleep_interruptible(300); + + } while (1); + + DBUG_ON(xpc_arch_ops.any_partition_engaged()); + + xpc_teardown_rsvd_page(); + + if (reason == xpUnloading) { + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); + } + + /* clear the interface to XPC's functions */ + xpc_clear_interface(); + + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); + + if (is_shub()) + xpc_exit_sn2(); + else if (is_uv()) + xpc_exit_uv(); +} + +/* + * This function is called when the system is being rebooted. + */ +static int +xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) +{ + enum xp_retval reason; + + switch (event) { + case SYS_RESTART: + reason = xpSystemReboot; + break; + case SYS_HALT: + reason = xpSystemHalt; + break; + case SYS_POWER_OFF: + reason = xpSystemPoweroff; + break; + default: + reason = xpSystemGoingDown; + } + + xpc_do_exit(reason); + return NOTIFY_DONE; +} + +/* + * Notify other partitions to deactivate from us by first disengaging from all + * references to our memory. + */ +static void +xpc_die_deactivate(void) +{ + struct xpc_partition *part; + short partid; + int any_engaged; + long keep_waiting; + long wait_to_print; + + /* keep xpc_hb_checker thread from doing anything (just in case) */ + xpc_exiting = 1; + + xpc_arch_ops.disallow_all_hbs(); /*indicate we're deactivated */ + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_arch_ops.partition_engaged(partid) || + part->act_state != XPC_P_AS_INACTIVE) { + xpc_arch_ops.request_partition_deactivation(part); + xpc_arch_ops.indicate_partition_disengaged(part); + } + } + + /* + * Though we requested that all other partitions deactivate from us, + * we only wait until they've all disengaged or we've reached the + * defined timelimit. + * + * Given that one iteration through the following while-loop takes + * approximately 200 microseconds, calculate the #of loops to take + * before bailing and the #of loops before printing a waiting message. + */ + keep_waiting = xpc_disengage_timelimit * 1000 * 5; + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5; + + while (1) { + any_engaged = xpc_arch_ops.any_partition_engaged(); + if (!any_engaged) { + dev_info(xpc_part, "all partitions have deactivated\n"); + break; + } + + if (!keep_waiting--) { + for (partid = 0; partid < xp_max_npartitions; + partid++) { + if (xpc_arch_ops.partition_engaged(partid)) { + dev_info(xpc_part, "deactivate from " + "remote partition %d timed " + "out\n", partid); + } + } + break; + } + + if (!wait_to_print--) { + dev_info(xpc_part, "waiting for remote partitions to " + "deactivate, timeout in %ld seconds\n", + keep_waiting / (1000 * 5)); + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * + 1000 * 5; + } + + udelay(200); + } +} + +/* + * This function is called when the system is being restarted or halted due + * to some sort of system failure. If this is the case we need to notify the + * other partitions to disengage from all references to our memory. + * This function can also be called when our heartbeater could be offlined + * for a time. In this case we need to notify other partitions to not worry + * about the lack of a heartbeat. + */ +static int +xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) +{ +#ifdef CONFIG_IA64 /* !!! temporary kludge */ + switch (event) { + case DIE_MACHINE_RESTART: + case DIE_MACHINE_HALT: + xpc_die_deactivate(); + break; + + case DIE_KDEBUG_ENTER: + /* Should lack of heartbeat be ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + /* fall through */ + case DIE_MCA_MONARCH_ENTER: + case DIE_INIT_MONARCH_ENTER: + xpc_arch_ops.offline_heartbeat(); + break; + + case DIE_KDEBUG_LEAVE: + /* Is lack of heartbeat being ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + /* fall through */ + case DIE_MCA_MONARCH_LEAVE: + case DIE_INIT_MONARCH_LEAVE: + xpc_arch_ops.online_heartbeat(); + break; + } +#else + xpc_die_deactivate(); +#endif + + return NOTIFY_DONE; +} + +int __init +xpc_init(void) +{ + int ret; + struct task_struct *kthread; + + dev_set_name(xpc_part, "part"); + dev_set_name(xpc_chan, "chan"); + + if (is_shub()) { + /* + * The ia64-sn2 architecture supports at most 64 partitions. + * And the inability to unregister remote amos restricts us + * further to only support exactly 64 partitions on this + * architecture, no less. + */ + if (xp_max_npartitions != 64) { + dev_err(xpc_part, "max #of partitions not set to 64\n"); + ret = -EINVAL; + } else { + ret = xpc_init_sn2(); + } + + } else if (is_uv()) { + ret = xpc_init_uv(); + + } else { + ret = -ENODEV; + } + + if (ret != 0) + return ret; + + ret = xpc_setup_partitions(); + if (ret != 0) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + goto out_1; + } + + xpc_sysctl = register_sysctl_table(xpc_sys_dir); + + /* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ + ret = xpc_setup_rsvd_page(); + if (ret != 0) { + dev_err(xpc_part, "can't setup our reserved page\n"); + goto out_2; + } + + /* add ourselves to the reboot_notifier_list */ + ret = register_reboot_notifier(&xpc_reboot_notifier); + if (ret != 0) + dev_warn(xpc_part, "can't register reboot notifier\n"); + + /* add ourselves to the die_notifier list */ + ret = register_die_notifier(&xpc_die_notifier); + if (ret != 0) + dev_warn(xpc_part, "can't register die notifier\n"); + + /* + * The real work-horse behind xpc. This processes incoming + * interrupts and monitors remote heartbeats. + */ + kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME); + if (IS_ERR(kthread)) { + dev_err(xpc_part, "failed while forking hb check thread\n"); + ret = -EBUSY; + goto out_3; + } + + /* + * Startup a thread that will attempt to discover other partitions to + * activate based on info provided by SAL. This new thread is short + * lived and will exit once discovery is complete. + */ + kthread = kthread_run(xpc_initiate_discovery, NULL, + XPC_DISCOVERY_THREAD_NAME); + if (IS_ERR(kthread)) { + dev_err(xpc_part, "failed while forking discovery thread\n"); + + /* mark this new thread as a non-starter */ + complete(&xpc_discovery_exited); + + xpc_do_exit(xpUnloading); + return -EBUSY; + } + + /* set the interface to point at XPC's functions */ + xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, + xpc_initiate_send, xpc_initiate_send_notify, + xpc_initiate_received, xpc_initiate_partid_to_nasids); + + return 0; + + /* initialization was not successful */ +out_3: + xpc_teardown_rsvd_page(); + + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); +out_2: + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); +out_1: + if (is_shub()) + xpc_exit_sn2(); + else if (is_uv()) + xpc_exit_uv(); + return ret; +} + +module_init(xpc_init); + +void __exit +xpc_exit(void) +{ + xpc_do_exit(xpUnloading); +} + +module_exit(xpc_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Communication (XPC) support"); +MODULE_LICENSE("GPL"); + +module_param(xpc_hb_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between " + "heartbeat increments."); + +module_param(xpc_hb_check_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " + "heartbeat checks."); + +module_param(xpc_disengage_timelimit, int, 0); +MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait " + "for disengage to complete."); + +module_param(xpc_kdebug_ignore, int, 0); +MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " + "other partitions when dropping into kdebug."); diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c new file mode 100644 index 00000000000..9a6268c89fd --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -0,0 +1,535 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) partition support. + * + * This is the part of XPC that detects the presence/absence of + * other partitions. It provides a heartbeat and monitors the + * heartbeats of other partitions. + * + */ + +#include <linux/device.h> +#include <linux/hardirq.h> +#include "xpc.h" +#include <asm/uv/uv_hub.h> + +/* XPC is exiting flag */ +int xpc_exiting; + +/* this partition's reserved page pointers */ +struct xpc_rsvd_page *xpc_rsvd_page; +static unsigned long *xpc_part_nasids; +unsigned long *xpc_mach_nasids; + +static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */ +int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */ + +struct xpc_partition *xpc_partitions; + +/* + * Guarantee that the kmalloc'd memory is cacheline aligned. + */ +void * +xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kmalloc will give us cachline aligned memory by default */ + *base = kmalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kmalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Given a nasid, get the physical address of the partition's reserved page + * for that nasid. This function returns 0 on any error. + */ +static unsigned long +xpc_get_rsvd_page_pa(int nasid) +{ + enum xp_retval ret; + u64 cookie = 0; + unsigned long rp_pa = nasid; /* seed with nasid */ + size_t len = 0; + size_t buf_len = 0; + void *buf = buf; + void *buf_base = NULL; + enum xp_retval (*get_partition_rsvd_page_pa) + (void *, u64 *, unsigned long *, size_t *) = + xpc_arch_ops.get_partition_rsvd_page_pa; + + while (1) { + + /* !!! rp_pa will need to be _gpa on UV. + * ??? So do we save it into the architecture specific parts + * ??? of the xpc_partition structure? Do we rename this + * ??? function or have two versions? Rename rp_pa for UV to + * ??? rp_gpa? + */ + ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len); + + dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " + "address=0x%016lx, len=0x%016lx\n", ret, + (unsigned long)cookie, rp_pa, len); + + if (ret != xpNeedMoreInfo) + break; + + /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ + if (is_shub()) + len = L1_CACHE_ALIGN(len); + + if (len > buf_len) { + if (buf_base != NULL) + kfree(buf_base); + buf_len = L1_CACHE_ALIGN(len); + buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL, + &buf_base); + if (buf_base == NULL) { + dev_err(xpc_part, "unable to kmalloc " + "len=0x%016lx\n", buf_len); + ret = xpNoMemory; + break; + } + } + + ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len); + if (ret != xpSuccess) { + dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret); + break; + } + } + + kfree(buf_base); + + if (ret != xpSuccess) + rp_pa = 0; + + dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); + return rp_pa; +} + +/* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ +int +xpc_setup_rsvd_page(void) +{ + int ret; + struct xpc_rsvd_page *rp; + unsigned long rp_pa; + unsigned long new_ts_jiffies; + + /* get the local reserved page's address */ + + preempt_disable(); + rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id())); + preempt_enable(); + if (rp_pa == 0) { + dev_err(xpc_part, "SAL failed to locate the reserved page\n"); + return -ESRCH; + } + rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa)); + + if (rp->SAL_version < 3) { + /* SAL_versions < 3 had a SAL_partid defined as a u8 */ + rp->SAL_partid &= 0xff; + } + BUG_ON(rp->SAL_partid != xp_partition_id); + + if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { + dev_err(xpc_part, "the reserved page's partid of %d is outside " + "supported range (< 0 || >= %d)\n", rp->SAL_partid, + xp_max_npartitions); + return -EINVAL; + } + + rp->version = XPC_RP_VERSION; + rp->max_npartitions = xp_max_npartitions; + + /* establish the actual sizes of the nasid masks */ + if (rp->SAL_version == 1) { + /* SAL_version 1 didn't set the nasids_size field */ + rp->SAL_nasids_size = 128; + } + xpc_nasid_mask_nbytes = rp->SAL_nasids_size; + xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * + BITS_PER_BYTE); + + /* setup the pointers to the various items in the reserved page */ + xpc_part_nasids = XPC_RP_PART_NASIDS(rp); + xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); + + ret = xpc_arch_ops.setup_rsvd_page(rp); + if (ret != 0) + return ret; + + /* + * Set timestamp of when reserved page was setup by XPC. + * This signifies to the remote partition that our reserved + * page is initialized. + */ + new_ts_jiffies = jiffies; + if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) + new_ts_jiffies++; + rp->ts_jiffies = new_ts_jiffies; + + xpc_rsvd_page = rp; + return 0; +} + +void +xpc_teardown_rsvd_page(void) +{ + /* a zero timestamp indicates our rsvd page is not initialized */ + xpc_rsvd_page->ts_jiffies = 0; +} + +/* + * Get a copy of a portion of the remote partition's rsvd page. + * + * remote_rp points to a buffer that is cacheline aligned for BTE copies and + * is large enough to contain a copy of their reserved page header and + * part_nasids mask. + */ +enum xp_retval +xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids, + struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa) +{ + int l; + enum xp_retval ret; + + /* get the reserved page's physical address */ + + *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); + if (*remote_rp_pa == 0) + return xpNoRsvdPageAddr; + + /* pull over the reserved page header and part_nasids mask */ + ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa, + XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes); + if (ret != xpSuccess) + return ret; + + if (discovered_nasids != NULL) { + unsigned long *remote_part_nasids = + XPC_RP_PART_NASIDS(remote_rp); + + for (l = 0; l < xpc_nasid_mask_nlongs; l++) + discovered_nasids[l] |= remote_part_nasids[l]; + } + + /* zero timestamp indicates the reserved page has not been setup */ + if (remote_rp->ts_jiffies == 0) + return xpRsvdPageNotSet; + + if (XPC_VERSION_MAJOR(remote_rp->version) != + XPC_VERSION_MAJOR(XPC_RP_VERSION)) { + return xpBadVersion; + } + + /* check that both remote and local partids are valid for each side */ + if (remote_rp->SAL_partid < 0 || + remote_rp->SAL_partid >= xp_max_npartitions || + remote_rp->max_npartitions <= xp_partition_id) { + return xpInvalidPartid; + } + + if (remote_rp->SAL_partid == xp_partition_id) + return xpLocalPartid; + + return xpSuccess; +} + +/* + * See if the other side has responded to a partition deactivate request + * from us. Though we requested the remote partition to deactivate with regard + * to us, we really only need to wait for the other side to disengage from us. + */ +int +xpc_partition_disengaged(struct xpc_partition *part) +{ + short partid = XPC_PARTID(part); + int disengaged; + + disengaged = !xpc_arch_ops.partition_engaged(partid); + if (part->disengage_timeout) { + if (!disengaged) { + if (time_is_after_jiffies(part->disengage_timeout)) { + /* timelimit hasn't been reached yet */ + return 0; + } + + /* + * Other side hasn't responded to our deactivate + * request in a timely fashion, so assume it's dead. + */ + + dev_info(xpc_part, "deactivate request to remote " + "partition %d timed out\n", partid); + xpc_disengage_timedout = 1; + xpc_arch_ops.assume_partition_disengaged(partid); + disengaged = 1; + } + part->disengage_timeout = 0; + + /* cancel the timer function, provided it's not us */ + if (!in_interrupt()) + del_singleshot_timer_sync(&part->disengage_timer); + + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && + part->act_state != XPC_P_AS_INACTIVE); + if (part->act_state != XPC_P_AS_INACTIVE) + xpc_wakeup_channel_mgr(part); + + xpc_arch_ops.cancel_partition_deactivation_request(part); + } + return disengaged; +} + +/* + * Mark specified partition as active. + */ +enum xp_retval +xpc_mark_partition_active(struct xpc_partition *part) +{ + unsigned long irq_flags; + enum xp_retval ret; + + dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + if (part->act_state == XPC_P_AS_ACTIVATING) { + part->act_state = XPC_P_AS_ACTIVE; + ret = xpSuccess; + } else { + DBUG_ON(part->reason == xpSuccess); + ret = part->reason; + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + return ret; +} + +/* + * Start the process of deactivating the specified partition. + */ +void +xpc_deactivate_partition(const int line, struct xpc_partition *part, + enum xp_retval reason) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_AS_INACTIVE) { + XPC_SET_REASON(part, reason, line); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + if (reason == xpReactivating) { + /* we interrupt ourselves to reactivate partition */ + xpc_arch_ops.request_partition_reactivation(part); + } + return; + } + if (part->act_state == XPC_P_AS_DEACTIVATING) { + if ((part->reason == xpUnloading && reason != xpUnloading) || + reason == xpReactivating) { + XPC_SET_REASON(part, reason, line); + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + return; + } + + part->act_state = XPC_P_AS_DEACTIVATING; + XPC_SET_REASON(part, reason, line); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + /* ask remote partition to deactivate with regard to us */ + xpc_arch_ops.request_partition_deactivation(part); + + /* set a timelimit on the disengage phase of the deactivation request */ + part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); + part->disengage_timer.expires = part->disengage_timeout; + add_timer(&part->disengage_timer); + + dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", + XPC_PARTID(part), reason); + + xpc_partition_going_down(part, reason); +} + +/* + * Mark specified partition as inactive. + */ +void +xpc_mark_partition_inactive(struct xpc_partition *part) +{ + unsigned long irq_flags; + + dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", + XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_AS_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; +} + +/* + * SAL has provided a partition and machine mask. The partition mask + * contains a bit for each even nasid in our partition. The machine + * mask contains a bit for each even nasid in the entire machine. + * + * Using those two bit arrays, we can determine which nasids are + * known in the machine. Each should also have a reserved page + * initialized if they are available for partitioning. + */ +void +xpc_discovery(void) +{ + void *remote_rp_base; + struct xpc_rsvd_page *remote_rp; + unsigned long remote_rp_pa; + int region; + int region_size; + int max_regions; + int nasid; + struct xpc_rsvd_page *rp; + unsigned long *discovered_nasids; + enum xp_retval ret; + + remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + + xpc_nasid_mask_nbytes, + GFP_KERNEL, &remote_rp_base); + if (remote_rp == NULL) + return; + + discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs, + GFP_KERNEL); + if (discovered_nasids == NULL) { + kfree(remote_rp_base); + return; + } + + rp = (struct xpc_rsvd_page *)xpc_rsvd_page; + + /* + * The term 'region' in this context refers to the minimum number of + * nodes that can comprise an access protection grouping. The access + * protection is in regards to memory, IOI and IPI. + */ + max_regions = 64; + region_size = xp_region_size; + + switch (region_size) { + case 128: + max_regions *= 2; + case 64: + max_regions *= 2; + case 32: + max_regions *= 2; + region_size = 16; + DBUG_ON(!is_shub2()); + } + + for (region = 0; region < max_regions; region++) { + + if (xpc_exiting) + break; + + dev_dbg(xpc_part, "searching region %d\n", region); + + for (nasid = (region * region_size * 2); + nasid < ((region + 1) * region_size * 2); nasid += 2) { + + if (xpc_exiting) + break; + + dev_dbg(xpc_part, "checking nasid %d\n", nasid); + + if (test_bit(nasid / 2, xpc_part_nasids)) { + dev_dbg(xpc_part, "PROM indicates Nasid %d is " + "part of the local partition; skipping " + "region\n", nasid); + break; + } + + if (!(test_bit(nasid / 2, xpc_mach_nasids))) { + dev_dbg(xpc_part, "PROM indicates Nasid %d was " + "not on Numa-Link network at reset\n", + nasid); + continue; + } + + if (test_bit(nasid / 2, discovered_nasids)) { + dev_dbg(xpc_part, "Nasid %d is part of a " + "partition which was previously " + "discovered\n", nasid); + continue; + } + + /* pull over the rsvd page header & part_nasids mask */ + + ret = xpc_get_remote_rp(nasid, discovered_nasids, + remote_rp, &remote_rp_pa); + if (ret != xpSuccess) { + dev_dbg(xpc_part, "unable to get reserved page " + "from nasid %d, reason=%d\n", nasid, + ret); + + if (ret == xpLocalPartid) + break; + + continue; + } + + xpc_arch_ops.request_partition_activation(remote_rp, + remote_rp_pa, nasid); + } + } + + kfree(discovered_nasids); + kfree(remote_rp_base); +} + +/* + * Given a partid, get the nasids owned by that partition from the + * remote partition's reserved page. + */ +enum xp_retval +xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) +{ + struct xpc_partition *part; + unsigned long part_nasid_pa; + + part = &xpc_partitions[partid]; + if (part->remote_rp_pa == 0) + return xpPartitionDown; + + memset(nasid_mask, 0, xpc_nasid_mask_nbytes); + + part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa); + + return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa, + xpc_nasid_mask_nbytes); +} diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c new file mode 100644 index 00000000000..8b70e03f939 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_sn2.c @@ -0,0 +1,2461 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) sn2-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/delay.h> +#include <asm/uncached.h> +#include <asm/sn/mspec.h> +#include <asm/sn/sn_sal.h> +#include "xpc.h" + +/* + * Define the number of u64s required to represent all the C-brick nasids + * as a bitmap. The cross-partition kernel modules deal only with + * C-brick nasids, thus the need for bitmaps which don't account for + * odd-numbered (non C-brick) nasids. + */ +#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2) +#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8) +#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64) + +/* + * Memory for XPC's amo variables is allocated by the MSPEC driver. These + * pages are located in the lowest granule. The lowest granule uses 4k pages + * for cached references and an alternate TLB handler to never provide a + * cacheable mapping for the entire region. This will prevent speculative + * reading of cached copies of our lines from being issued which will cause + * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 + * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of + * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify + * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote + * partitions (i.e., XPCs) consider themselves currently engaged with the + * local XPC and 1 amo variable to request partition deactivation. + */ +#define XPC_NOTIFY_IRQ_AMOS_SN2 0 +#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \ + XP_MAX_NPARTITIONS_SN2) +#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \ + XP_NASID_MASK_WORDS_SN2) +#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1) + +/* + * Buffer used to store a local copy of portions of a remote partition's + * reserved page (either its header and part_nasids mask, or its vars). + */ +static void *xpc_remote_copy_buffer_base_sn2; +static char *xpc_remote_copy_buffer_sn2; + +static struct xpc_vars_sn2 *xpc_vars_sn2; +static struct xpc_vars_part_sn2 *xpc_vars_part_sn2; + +static int +xpc_setup_partitions_sn2(void) +{ + /* nothing needs to be done */ + return 0; +} + +static void +xpc_teardown_partitions_sn2(void) +{ + /* nothing needs to be done */ +} + +/* SH_IPI_ACCESS shub register value on startup */ +static u64 xpc_sh1_IPI_access_sn2; +static u64 xpc_sh2_IPI_access0_sn2; +static u64 xpc_sh2_IPI_access1_sn2; +static u64 xpc_sh2_IPI_access2_sn2; +static u64 xpc_sh2_IPI_access3_sn2; + +/* + * Change protections to allow IPI operations. + */ +static void +xpc_allow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + xpc_sh2_IPI_access0_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); + xpc_sh2_IPI_access1_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); + xpc_sh2_IPI_access2_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); + xpc_sh2_IPI_access3_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + -1UL); + } + } else { + xpc_sh1_IPI_access_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + -1UL); + } + } +} + +/* + * Restrict protections to disallow IPI operations. + */ +static void +xpc_disallow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + xpc_sh2_IPI_access0_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + xpc_sh2_IPI_access1_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + xpc_sh2_IPI_access2_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + xpc_sh2_IPI_access3_sn2); + } + } else { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + xpc_sh1_IPI_access_sn2); + } + } +} + +/* + * The following set of functions are used for the sending and receiving of + * IRQs (also known as IPIs). There are two flavors of IRQs, one that is + * associated with partition activity (SGI_XPC_ACTIVATE) and the other that + * is associated with channel activity (SGI_XPC_NOTIFY). + */ + +static u64 +xpc_receive_IRQ_amo_sn2(struct amo *amo) +{ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR); +} + +static enum xp_retval +xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid, + int vector) +{ + int ret = 0; + unsigned long irq_flags; + + local_irq_save(irq_flags); + + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag); + sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + return (ret == 0) ? xpSuccess : xpPioReadError; +} + +static struct amo * +xpc_init_IRQ_amo_sn2(int index) +{ + struct amo *amo = xpc_vars_sn2->amos_page + index; + + (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */ + return amo; +} + +/* + * Functions associated with SGI_XPC_ACTIVATE IRQ. + */ + +/* + * Notify the heartbeat check thread that an activate IRQ has been received. + */ +static irqreturn_t +xpc_handle_activate_IRQ_sn2(int irq, void *dev_id) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return IRQ_HANDLED; +} + +/* + * Flag the appropriate amo variable and send an IRQ to the specified node. + */ +static void +xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid, + int to_nasid, int to_phys_cpuid) +{ + struct amo *amos = (struct amo *)__va(amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)], + BIT_MASK(from_nasid / 2), to_nasid, + to_phys_cpuid, SGI_XPC_ACTIVATE); +} + +static void +xpc_send_local_activate_IRQ_sn2(int from_nasid) +{ + unsigned long irq_flags; + struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + /* fake the sending and receipt of an activate IRQ from remote nasid */ + FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable), + FETCHOP_OR, BIT_MASK(from_nasid / 2)); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +/* + * Functions associated with SGI_XPC_NOTIFY IRQ. + */ + +/* + * Check to see if any chctl flags were sent from the specified partition. + */ +static void +xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part) +{ + union xpc_channel_ctl_flags chctl; + unsigned long irq_flags; + + chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2. + local_chctl_amo_va); + if (chctl.all_flags == 0) + return; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.all_flags |= chctl.all_flags; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags=" + "0x%llx\n", XPC_PARTID(part), chctl.all_flags); + + xpc_wakeup_channel_mgr(part); +} + +/* + * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified + * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more + * than one partition, we use an amo structure per partition to indicate + * whether a partition has sent an IRQ or not. If it has, then wake up the + * associated kthread to handle it. + * + * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC + * running on other partitions. + * + * Noteworthy Arguments: + * + * irq - Interrupt ReQuest number. NOT USED. + * + * dev_id - partid of IRQ's potential sender. + */ +static irqreturn_t +xpc_handle_notify_IRQ_sn2(int irq, void *dev_id) +{ + short partid = (short)(u64)dev_id; + struct xpc_partition *part = &xpc_partitions[partid]; + + DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2); + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + xpc_part_deref(part); + } + return IRQ_HANDLED; +} + +/* + * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor + * because the write to their associated amo variable completed after the IRQ + * was received. + */ +static void +xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + part_sn2->dropped_notify_IRQ_timer.expires = jiffies + + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(&part_sn2->dropped_notify_IRQ_timer); + xpc_part_deref(part); + } +} + +/* + * Send a notify IRQ to the remote partition that is associated with the + * specified channel. + */ +static void +xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + union xpc_channel_ctl_flags chctl = { 0 }; + enum xp_retval ret; + + if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) { + chctl.flags[ch->number] = chctl_flag; + ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va, + chctl.all_flags, + part_sn2->notify_IRQ_nasid, + part_sn2->notify_IRQ_phys_cpuid, + SGI_XPC_NOTIFY); + dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", + chctl_flag_string, ch->partid, ch->number, ret); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + XPC_DEACTIVATE_PARTITION(part, ret); + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } + } +} + +#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \ + xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f) + +/* + * Make it look like the remote partition, which is associated with the + * specified channel, sent us a notify IRQ. This faked IRQ will be handled + * by xpc_check_for_dropped_notify_IRQ_sn2(). + */ +static void +xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + union xpc_channel_ctl_flags chctl = { 0 }; + + chctl.flags[ch->number] = chctl_flag; + FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va-> + variable), FETCHOP_OR, chctl.all_flags); + dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", + chctl_flag_string, ch->partid, ch->number); +} + +#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \ + xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f) + +static void +xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch, + unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->reason = ch->reason; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags); +} + +static void +xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags); +} + +static void +xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->entry_size = ch->entry_size; + args->local_nentries = ch->local_nentries; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags); +} + +static void +xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->remote_nentries = ch->remote_nentries; + args->local_nentries = ch->local_nentries; + args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue); + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags); +} + +static void +xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch, + unsigned long *irq_flags) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags); +} + +static void +xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL); +} + +static void +xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST); +} + +static enum xp_retval +xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch, + unsigned long msgqueue_pa) +{ + ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa; + return xpSuccess; +} + +/* + * This next set of functions are used to keep track of when a partition is + * potentially engaged in accessing memory belonging to another partition. + */ + +static void +xpc_indicate_partition_engaged_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static void +xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've cleared our + * bit in their engaged partitions amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_assume_partition_disengaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* clear bit(s) based on partid mask in our partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(partid)); +} + +static int +xpc_partition_engaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +static int +xpc_any_partition_engaged_sn2(void) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable */ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0; +} + +/* original protection values for each node */ +static u64 xpc_prot_vec_sn2[MAX_NUMNODES]; + +/* + * Change protections to allow amo operations on non-Shub 1.1 systems. + */ +static enum xp_retval +xpc_allow_amo_ops_sn2(struct amo *amos_page) +{ + enum xp_retval ret = xpSuccess; + + /* + * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST + * collides with memory operations. On those systems we call + * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead. + */ + if (!enable_shub_wars_1_1()) + ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE); + + return ret; +} + +/* + * Change protections to allow amo operations on Shub 1.1 systems. + */ +static void +xpc_allow_amo_ops_shub_wars_1_1_sn2(void) +{ + int node; + int nasid; + + if (!enable_shub_wars_1_1()) + return; + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + /* save current protection values */ + xpc_prot_vec_sn2[node] = + (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0)); + /* open up everything */ + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + -1UL); + } +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + s64 status; + enum xp_retval ret; + + status = sn_partition_reserved_page_pa((u64)buf, cookie, + (u64 *)rp_pa, (u64 *)len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + + return ret; +} + + +static int +xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp) +{ + struct amo *amos_page; + int i; + int ret; + + xpc_vars_sn2 = XPC_RP_VARS(rp); + + rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2); + + /* vars_part array follows immediately after vars */ + xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) + + XPC_RP_VARS_SIZE); + + /* + * Before clearing xpc_vars_sn2, see if a page of amos had been + * previously allocated. If not we'll need to allocate one and set + * permissions so that cross-partition amos are allowed. + * + * The allocated amo page needs MCA reporting to remain disabled after + * XPC has unloaded. To make this work, we keep a copy of the pointer + * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure, + * which is pointed to by the reserved page, and re-use that saved copy + * on subsequent loads of XPC. This amo page is never freed, and its + * memory protections are never restricted. + */ + amos_page = xpc_vars_sn2->amos_page; + if (amos_page == NULL) { + amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1)); + if (amos_page == NULL) { + dev_err(xpc_part, "can't allocate page of amos\n"); + return -ENOMEM; + } + + /* + * Open up amo-R/W to cpu. This is done on Shub 1.1 systems + * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called. + */ + ret = xpc_allow_amo_ops_sn2(amos_page); + if (ret != xpSuccess) { + dev_err(xpc_part, "can't allow amo operations\n"); + uncached_free_page(__IA64_UNCACHED_OFFSET | + TO_PHYS((u64)amos_page), 1); + return -EPERM; + } + } + + /* clear xpc_vars_sn2 */ + memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2)); + + xpc_vars_sn2->version = XPC_V_VERSION; + xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0); + xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0); + xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2); + xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page); + xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */ + + /* clear xpc_vars_part_sn2 */ + memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) * + XP_MAX_NPARTITIONS_SN2); + + /* initialize the activate IRQ related amo variables */ + for (i = 0; i < xpc_nasid_mask_nlongs; i++) + (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i); + + /* initialize the engaged remote partitions related amo variables */ + (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2); + (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2); + + return 0; +} + +static int +xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask) +{ + return test_bit(partid, heartbeating_to_mask); +} + +static void +xpc_allow_hb_sn2(short partid) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + set_bit(partid, xpc_vars_sn2->heartbeating_to_mask); +} + +static void +xpc_disallow_hb_sn2(short partid) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask); +} + +static void +xpc_disallow_all_hbs_sn2(void) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions); +} + +static void +xpc_increment_heartbeat_sn2(void) +{ + xpc_vars_sn2->heartbeat++; +} + +static void +xpc_offline_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 1; +} + +static void +xpc_online_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 0; +} + +static void +xpc_heartbeat_init_sn2(void) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + + bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + xpc_online_heartbeat_sn2(); +} + +static void +xpc_heartbeat_exit_sn2(void) +{ + xpc_offline_heartbeat_sn2(); +} + +static enum xp_retval +xpc_get_remote_heartbeat_sn2(struct xpc_partition *part) +{ + struct xpc_vars_sn2 *remote_vars; + enum xp_retval ret; + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + /* pull the remote vars structure that contains the heartbeat */ + ret = xp_remote_memcpy(xp_pa(remote_vars), + part->sn.sn2.remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + dev_dbg(xpc_part, "partid=%d, heartbeat=%lld, last_heartbeat=%lld, " + "heartbeat_offline=%lld, HB_mask[0]=0x%lx\n", XPC_PARTID(part), + remote_vars->heartbeat, part->last_heartbeat, + remote_vars->heartbeat_offline, + remote_vars->heartbeating_to_mask[0]); + + if ((remote_vars->heartbeat == part->last_heartbeat && + !remote_vars->heartbeat_offline) || + !xpc_hb_allowed_sn2(sn_partition_id, + remote_vars->heartbeating_to_mask)) { + ret = xpNoHeartbeat; + } else { + part->last_heartbeat = remote_vars->heartbeat; + } + + return ret; +} + +/* + * Get a copy of the remote partition's XPC variables from the reserved page. + * + * remote_vars points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_RP_VARS_SIZE. + */ +static enum xp_retval +xpc_get_remote_vars_sn2(unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + enum xp_retval ret; + + if (remote_vars_pa == 0) + return xpVarsNotSet; + + /* pull over the cross partition variables */ + ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + if (XPC_VERSION_MAJOR(remote_vars->version) != + XPC_VERSION_MAJOR(XPC_V_VERSION)) { + return xpBadVersion; + } + + return xpSuccess; +} + +static void +xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_pa, int nasid) +{ + xpc_send_local_activate_IRQ_sn2(nasid); +} + +static void +xpc_request_partition_reactivation_sn2(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid); +} + +static void +xpc_request_partition_deactivation_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've set our + * bit in their deactivate request amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static int +xpc_partition_deactivation_requested_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_DEACTIVATE_REQUEST_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +/* + * Update the remote partition's info. + */ +static void +xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version, + unsigned long *remote_rp_ts_jiffies, + unsigned long remote_rp_pa, + unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + + part->remote_rp_version = remote_rp_version; + dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", + part->remote_rp_version); + + part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies; + dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n", + part->remote_rp_ts_jiffies); + + part->remote_rp_pa = remote_rp_pa; + dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); + + part_sn2->remote_vars_pa = remote_vars_pa; + dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", + part_sn2->remote_vars_pa); + + part->last_heartbeat = remote_vars->heartbeat - 1; + dev_dbg(xpc_part, " last_heartbeat = 0x%016llx\n", + part->last_heartbeat); + + part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa; + dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", + part_sn2->remote_vars_part_pa); + + part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid; + dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n", + part_sn2->activate_IRQ_nasid); + + part_sn2->activate_IRQ_phys_cpuid = + remote_vars->activate_IRQ_phys_cpuid; + dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n", + part_sn2->activate_IRQ_phys_cpuid); + + part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa; + dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", + part_sn2->remote_amos_page_pa); + + part_sn2->remote_vars_version = remote_vars->version; + dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", + part_sn2->remote_vars_version); +} + +/* + * Prior code has determined the nasid which generated a activate IRQ. + * Inspect that nasid to determine if its partition needs to be activated + * or deactivated. + * + * A partition is considered "awaiting activation" if our partition + * flags indicate it is not active and it has a heartbeat. A + * partition is considered "awaiting deactivation" if our partition + * flags indicate it is active but it has no heartbeat or it is not + * sending its heartbeat to us. + * + * To determine the heartbeat, the remote nasid must have a properly + * initialized reserved page. + */ +static void +xpc_identify_activate_IRQ_req_sn2(int nasid) +{ + struct xpc_rsvd_page *remote_rp; + struct xpc_vars_sn2 *remote_vars; + unsigned long remote_rp_pa; + unsigned long remote_vars_pa; + int remote_rp_version; + int reactivate = 0; + unsigned long remote_rp_ts_jiffies = 0; + short partid; + struct xpc_partition *part; + struct xpc_partition_sn2 *part_sn2; + enum xp_retval ret; + + /* pull over the reserved page structure */ + + remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get reserved page from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + return; + } + + remote_vars_pa = remote_rp->sn.sn2.vars_pa; + remote_rp_version = remote_rp->version; + remote_rp_ts_jiffies = remote_rp->ts_jiffies; + + partid = remote_rp->SAL_partid; + part = &xpc_partitions[partid]; + part_sn2 = &part->sn.sn2; + + /* pull over the cross partition variables */ + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + return; + } + + part->activate_IRQ_rcvd++; + + dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " + "%lld:0x%lx\n", (int)nasid, (int)partid, + part->activate_IRQ_rcvd, + remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]); + + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_AS_INACTIVE) { + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + + if (xpc_partition_deactivation_requested_sn2(partid)) { + /* + * Other side is waiting on us to deactivate even though + * we already have. + */ + return; + } + + xpc_activate_partition(part); + return; + } + + DBUG_ON(part->remote_rp_version == 0); + DBUG_ON(part_sn2->remote_vars_version == 0); + + if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) { + + /* the other side rebooted */ + + DBUG_ON(xpc_partition_engaged_sn2(partid)); + DBUG_ON(xpc_partition_deactivation_requested_sn2(partid)); + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + reactivate = 1; + } + + if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) { + /* still waiting on other side to disengage from us */ + return; + } + + if (reactivate) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + else if (xpc_partition_deactivation_requested_sn2(partid)) + XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown); +} + +/* + * Loop through the activation amo variables and process any bits + * which are set. Each bit indicates a nasid sending a partition + * activation or deactivation request. + * + * Return #of IRQs detected. + */ +int +xpc_identify_activate_IRQ_sender_sn2(void) +{ + int l; + int b; + unsigned long nasid_mask_long; + u64 nasid; /* remote nasid */ + int n_IRQs_detected = 0; + struct amo *act_amos; + + act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2; + + /* scan through activate amo variables looking for non-zero entries */ + for (l = 0; l < xpc_nasid_mask_nlongs; l++) { + + if (xpc_exiting) + break; + + nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]); + + b = find_first_bit(&nasid_mask_long, BITS_PER_LONG); + if (b >= BITS_PER_LONG) { + /* no IRQs from nasids in this amo variable */ + continue; + } + + dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l, + nasid_mask_long); + + /* + * If this nasid has been added to the machine since + * our partition was reset, this will retain the + * remote nasid in our reserved pages machine mask. + * This is used in the event of module reload. + */ + xpc_mach_nasids[l] |= nasid_mask_long; + + /* locate the nasid(s) which sent interrupts */ + + do { + n_IRQs_detected++; + nasid = (l * BITS_PER_LONG + b) * 2; + dev_dbg(xpc_part, "interrupt from nasid %lld\n", nasid); + xpc_identify_activate_IRQ_req_sn2(nasid); + + b = find_next_bit(&nasid_mask_long, BITS_PER_LONG, + b + 1); + } while (b < BITS_PER_LONG); + } + return n_IRQs_detected; +} + +static void +xpc_process_activate_IRQ_rcvd_sn2(void) +{ + unsigned long irq_flags; + int n_IRQs_expected; + int n_IRQs_detected; + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + n_IRQs_expected = xpc_activate_IRQ_rcvd; + xpc_activate_IRQ_rcvd = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2(); + if (n_IRQs_detected < n_IRQs_expected) { + /* retry once to help avoid missing amo */ + (void)xpc_identify_activate_IRQ_sender_sn2(); + } +} + +/* + * Setup the channel structures that are sn2 specific. + */ +static enum xp_retval +xpc_setup_ch_structures_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + struct xpc_channel_sn2 *ch_sn2; + enum xp_retval retval; + int ret; + int cpuid; + int ch_number; + struct timer_list *timer; + short partid = XPC_PARTID(part); + + /* allocate all the required GET/PUT values */ + + part_sn2->local_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->local_GPs_base); + if (part_sn2->local_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for local get/put " + "values\n"); + return xpNoMemory; + } + + part_sn2->remote_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->remote_GPs_base); + if (part_sn2->remote_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for remote get/put " + "values\n"); + retval = xpNoMemory; + goto out_1; + } + + part_sn2->remote_GPs_pa = 0; + + /* allocate all the required open and close args */ + + part_sn2->local_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part_sn2-> + local_openclose_args_base); + if (part_sn2->local_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for local connect args\n"); + retval = xpNoMemory; + goto out_2; + } + + part_sn2->remote_openclose_args_pa = 0; + + part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid); + + part_sn2->notify_IRQ_nasid = 0; + part_sn2->notify_IRQ_phys_cpuid = 0; + part_sn2->remote_chctl_amo_va = NULL; + + sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid); + ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2, + IRQF_SHARED, part_sn2->notify_IRQ_owner, + (void *)(u64)partid); + if (ret != 0) { + dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " + "errno=%d\n", -ret); + retval = xpLackOfResources; + goto out_3; + } + + /* Setup a timer to check for dropped notify IRQs */ + timer = &part_sn2->dropped_notify_IRQ_timer; + init_timer(timer); + timer->function = + (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2; + timer->data = (unsigned long)part; + timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(timer); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_sn2 = &part->channels[ch_number].sn.sn2; + + ch_sn2->local_GP = &part_sn2->local_GPs[ch_number]; + ch_sn2->local_openclose_args = + &part_sn2->local_openclose_args[ch_number]; + + mutex_init(&ch_sn2->msg_to_pull_mutex); + } + + /* + * Setup the per partition specific variables required by the + * remote partition to establish channel connections with us. + * + * The setting of the magic # indicates that these per partition + * specific variables are ready to be used. + */ + xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs); + xpc_vars_part_sn2[partid].openclose_args_pa = + xp_pa(part_sn2->local_openclose_args); + xpc_vars_part_sn2[partid].chctl_amo_pa = + xp_pa(part_sn2->local_chctl_amo_va); + cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ + xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid); + xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid = + cpu_physical_id(cpuid); + xpc_vars_part_sn2[partid].nchannels = part->nchannels; + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2; + + return xpSuccess; + + /* setup of ch structures failed */ +out_3: + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; +out_2: + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; +out_1: + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + return retval; +} + +/* + * Teardown the channel structures that are sn2 specific. + */ +static void +xpc_teardown_ch_structures_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + short partid = XPC_PARTID(part); + + /* + * Indicate that the variables specific to the remote partition are no + * longer available for its use. + */ + xpc_vars_part_sn2[partid].magic = 0; + + /* in case we've still got outstanding timers registered... */ + del_timer_sync(&part_sn2->dropped_notify_IRQ_timer); + free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid); + + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + part_sn2->local_chctl_amo_va = NULL; +} + +/* + * Create a wrapper that hides the underlying mechanism for pulling a cacheline + * (or multiple cachelines) from a remote partition. + * + * src_pa must be a cacheline aligned physical address on the remote partition. + * dst must be a cacheline aligned virtual address on this partition. + * cnt must be cacheline sized + */ +/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */ +static enum xp_retval +xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst, + const unsigned long src_pa, size_t cnt) +{ + enum xp_retval ret; + + DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa)); + DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst)); + DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + + ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed," + " ret=%d\n", XPC_PARTID(part), ret); + } + return ret; +} + +/* + * Pull the remote per partition specific variables from the specified + * partition. + */ +static enum xp_retval +xpc_pull_remote_vars_part_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + u8 buffer[L1_CACHE_BYTES * 2]; + struct xpc_vars_part_sn2 *pulled_entry_cacheline = + (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer); + struct xpc_vars_part_sn2 *pulled_entry; + unsigned long remote_entry_cacheline_pa; + unsigned long remote_entry_pa; + short partid = XPC_PARTID(part); + enum xp_retval ret; + + /* pull the cacheline that contains the variables we're interested in */ + + DBUG_ON(part_sn2->remote_vars_part_pa != + L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa)); + DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2); + + remote_entry_pa = part_sn2->remote_vars_part_pa + + sn_partition_id * sizeof(struct xpc_vars_part_sn2); + + remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); + + pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline + + (remote_entry_pa & + (L1_CACHE_BYTES - 1))); + + ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline, + remote_entry_cacheline_pa, + L1_CACHE_BYTES); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "failed to pull XPC vars_part from " + "partition %d, ret=%d\n", partid, ret); + return ret; + } + + /* see if they've been set up yet */ + + if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 && + pulled_entry->magic != XPC_VP_MAGIC2_SN2) { + + if (pulled_entry->magic != 0) { + dev_dbg(xpc_chan, "partition %d's XPC vars_part for " + "partition %d has bad magic value (=0x%llx)\n", + partid, sn_partition_id, pulled_entry->magic); + return xpBadMagic; + } + + /* they've not been initialized yet */ + return xpRetry; + } + + if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) { + + /* validate the variables */ + + if (pulled_entry->GPs_pa == 0 || + pulled_entry->openclose_args_pa == 0 || + pulled_entry->chctl_amo_pa == 0) { + + dev_err(xpc_chan, "partition %d's XPC vars_part for " + "partition %d are not valid\n", partid, + sn_partition_id); + return xpInvalidAddress; + } + + /* the variables we imported look to be valid */ + + part_sn2->remote_GPs_pa = pulled_entry->GPs_pa; + part_sn2->remote_openclose_args_pa = + pulled_entry->openclose_args_pa; + part_sn2->remote_chctl_amo_va = + (struct amo *)__va(pulled_entry->chctl_amo_pa); + part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid; + part_sn2->notify_IRQ_phys_cpuid = + pulled_entry->notify_IRQ_phys_cpuid; + + if (part->nchannels > pulled_entry->nchannels) + part->nchannels = pulled_entry->nchannels; + + /* let the other side know that we've pulled their variables */ + + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2; + } + + if (pulled_entry->magic == XPC_VP_MAGIC1_SN2) + return xpRetry; + + return xpSuccess; +} + +/* + * Establish first contact with the remote partititon. This involves pulling + * the XPC per partition variables from the remote partition and waiting for + * the remote partition to pull ours. + */ +static enum xp_retval +xpc_make_first_contact_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + enum xp_retval ret; + + /* + * Register the remote partition's amos with SAL so it can handle + * and cleanup errors within that address range should the remote + * partition go down. We don't unregister this range because it is + * difficult to tell when outstanding writes to the remote partition + * are finished and thus when it is safe to unregister. This should + * not result in wasted space in the SAL xp_addr_region table because + * we should get the same page for remote_amos_page_pa after module + * reloads and system reboots. + */ + if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_warn(xpc_part, "xpc_activating(%d) failed to register " + "xp_addr region\n", XPC_PARTID(part)); + + ret = xpPhysAddrRegFailed; + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + /* + * Send activate IRQ to get other side to activate if they've not + * already begun to do so. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); + + while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) { + if (ret != xpRetry) { + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +/* + * Get the chctl flags and pull the openclose args and/or remote GPs as needed. + */ +static u64 +xpc_get_chctl_all_flags_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + enum xp_retval ret; + + /* + * See if there are any chctl flags to be handled. + */ + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + if (xpc_any_openclose_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part-> + remote_openclose_args, + part_sn2-> + remote_openclose_args_pa, + XPC_OPENCLOSE_ARGS_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull openclose args from " + "partition %d, ret=%d\n", XPC_PARTID(part), + ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + if (xpc_any_msg_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs, + part_sn2->remote_GPs_pa, + XPC_GP_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull GPs from partition " + "%d, ret=%d\n", XPC_PARTID(part), ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + return chctl.all_flags; +} + +/* + * Allocate the local message queue and the notify queue. + */ +static enum xp_retval +xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->local_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, + &ch_sn2->local_msgqueue_base); + if (ch_sn2->local_msgqueue == NULL) + continue; + + nbytes = nentries * sizeof(struct xpc_notify_sn2); + ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL); + if (ch_sn2->notify_queue == NULL) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + continue; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->local_nentries, ch->partid, ch->number); + + ch->local_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for local message queue and notify " + "queue, partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate the cached remote message queue. + */ +static enum xp_retval +xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + DBUG_ON(ch->remote_nentries <= 0); + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->remote_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2-> + remote_msgqueue_base); + if (ch_sn2->remote_msgqueue == NULL) + continue; + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->remote_nentries, ch->partid, ch->number); + + ch->remote_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate message queues and other stuff associated with a channel. + * + * Note: Assumes all of the channel sizes are filled in. + */ +static enum xp_retval +xpc_setup_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + enum xp_retval ret; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ret = xpc_allocate_local_msgqueue_sn2(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_remote_msgqueue_sn2(ch); + if (ret != xpSuccess) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } + } + return ret; +} + +/* + * Free up message queues and other stuff that were allocated for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + ch_sn2->remote_msgqueue_pa = 0; + + ch_sn2->local_GP->get = 0; + ch_sn2->local_GP->put = 0; + ch_sn2->remote_GP.get = 0; + ch_sn2->remote_GP.put = 0; + ch_sn2->w_local_GP.get = 0; + ch_sn2->w_local_GP.put = 0; + ch_sn2->w_remote_GP.get = 0; + ch_sn2->w_remote_GP.put = 0; + ch_sn2->next_msg_to_pull = 0; + + if (ch->flags & XPC_C_SETUP) { + dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", + ch->flags, ch->partid, ch->number); + + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->remote_msgqueue_base); + ch_sn2->remote_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } +} + +/* + * Notify those who wanted to be notified upon delivery of their message. + */ +static void +xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put) +{ + struct xpc_notify_sn2 *notify; + u8 notify_type; + s64 get = ch->sn.sn2.w_remote_GP.get - 1; + + while (++get < put && atomic_read(&ch->n_to_notify) > 0) { + + notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries]; + + /* + * See if the notify entry indicates it was associated with + * a message who's sender wants to be notified. It is possible + * that it is, but someone else is doing or has done the + * notification. + */ + notify_type = notify->type; + if (notify_type == 0 || + cmpxchg(¬ify->type, notify_type, 0) != notify_type) { + continue; + } + + DBUG_ON(notify_type != XPC_N_CALL); + + atomic_dec(&ch->n_to_notify); + + if (notify->func != NULL) { + dev_dbg(xpc_chan, "notify->func() called, notify=0x%p " + "msg_number=%lld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + + notify->func(reason, ch->partid, ch->number, + notify->key); + + dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p" + " msg_number=%lld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + } + } +} + +static void +xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch) +{ + xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put); +} + +/* + * Clear some of the msg flags in the local message queue. + */ +static inline void +xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get; + + get = ch_sn2->w_remote_GP.get; + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (get % ch->local_nentries) * + ch->entry_size); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + msg->flags = 0; + } while (++get < ch_sn2->remote_GP.get); +} + +/* + * Clear some of the msg flags in the remote message queue. + */ +static inline void +xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put, remote_nentries = ch->remote_nentries; + + /* flags are zeroed when the buffer is allocated */ + if (ch_sn2->remote_GP.put < remote_nentries) + return; + + put = max(ch_sn2->w_remote_GP.put, remote_nentries); + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + (put % remote_nentries) * + ch->entry_size); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + DBUG_ON(!(msg->flags & XPC_M_SN2_DONE)); + DBUG_ON(msg->number != put - remote_nentries); + msg->flags = 0; + } while (++put < ch_sn2->remote_GP.put); +} + +static int +xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch) +{ + return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get; +} + +static void +xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + int npayloads_sent; + + ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number]; + + /* See what, if anything, has changed for each connected channel */ + + xpc_msgqueue_ref(ch); + + if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get && + ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) { + /* nothing changed since GPs were last pulled */ + xpc_msgqueue_deref(ch); + return; + } + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* + * First check to see if messages recently sent by us have been + * received by the other side. (The remote GET value will have + * changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) { + + /* + * We need to notify any senders that want to be notified + * that their sent messages have been received by their + * intended recipients. We need to do this before updating + * w_remote_GP.get so that we don't allocate the same message + * queue entries prematurely (see xpc_allocate_msg()). + */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* + * Notify senders that messages sent have been + * received and delivered by the other side. + */ + xpc_notify_senders_sn2(ch, xpMsgDelivered, + ch_sn2->remote_GP.get); + } + + /* + * Clear msg->flags in previously sent messages, so that + * they're ready for xpc_allocate_msg(). + */ + xpc_clear_local_msgqueue_flags_sn2(ch); + + ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get; + + dev_dbg(xpc_chan, "w_remote_GP.get changed to %lld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid, + ch->number); + + /* + * If anyone was waiting for message queue entries to become + * available, wake them up. + */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); + } + + /* + * Now check for newly sent messages by the other side. (The remote + * PUT value will have changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) { + /* + * Clear msg->flags in previously received messages, so that + * they're ready for xpc_get_deliverable_payload_sn2(). + */ + xpc_clear_remote_msgqueue_flags_sn2(ch); + + smp_wmb(); /* ensure flags have been cleared before bte_copy */ + ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put; + + dev_dbg(xpc_chan, "w_remote_GP.put changed to %lld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid, + ch->number); + + npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch); + if (npayloads_sent > 0) { + dev_dbg(xpc_chan, "msgs waiting to be copied and " + "delivered=%d, partid=%d, channel=%d\n", + npayloads_sent, ch->partid, ch->number); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) + xpc_activate_kthreads(ch, npayloads_sent); + } + } + + xpc_msgqueue_deref(ch); +} + +static struct xpc_msg_sn2 * +xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long remote_msg_pa; + struct xpc_msg_sn2 *msg; + u32 msg_index; + u32 nmsgs; + u64 msg_offset; + enum xp_retval ret; + + if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) { + /* we were interrupted by a signal */ + return NULL; + } + + while (get >= ch_sn2->next_msg_to_pull) { + + /* pull as many messages as are ready and able to be pulled */ + + msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries; + + DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put); + nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull; + if (msg_index + nmsgs > ch->remote_nentries) { + /* ignore the ones that wrap the msg queue for now */ + nmsgs = ch->remote_nentries - msg_index; + } + + msg_offset = msg_index * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + msg_offset); + remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset; + + ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa, + nmsgs * ch->entry_size); + if (ret != xpSuccess) { + + dev_dbg(xpc_chan, "failed to pull %d msgs starting with" + " msg %lld from partition %d, channel=%d, " + "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull, + ch->partid, ch->number, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + return NULL; + } + + ch_sn2->next_msg_to_pull += nmsgs; + } + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + + /* return the message we were looking for */ + msg_offset = (get % ch->remote_nentries) * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset); + + return msg; +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + void *payload = NULL; + s64 get; + + do { + if (ch->flags & XPC_C_DISCONNECTING) + break; + + get = ch_sn2->w_local_GP.get; + smp_rmb(); /* guarantee that .get loads before .put */ + if (get == ch_sn2->w_remote_GP.put) + break; + + /* There are messages waiting to be pulled and delivered. + * We need to try to secure one for ourselves. We'll do this + * by trying to increment w_local_GP.get and hope that no one + * else beats us to it. If they do, we'll we'll simply have + * to try again for the next one. + */ + + if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) { + /* we got the entry referenced by get */ + + dev_dbg(xpc_chan, "w_local_GP.get changed to %lld, " + "partid=%d, channel=%d\n", get + 1, + ch->partid, ch->number); + + /* pull the message from the remote partition */ + + msg = xpc_pull_remote_msg_sn2(ch, get); + + if (msg != NULL) { + DBUG_ON(msg->number != get); + DBUG_ON(msg->flags & XPC_M_SN2_DONE); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + + payload = &msg->payload; + } + break; + } + + } while (1); + + return payload; +} + +/* + * Now we actually send the messages that are ready to be sent by advancing + * the local message queue's Put value and then send a chctl msgrequest to the + * recipient partition. + */ +static void +xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put = initial_put + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (put == ch_sn2->w_local_GP.put) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + local_msgqueue + (put % + ch->local_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_READY)) + break; + + put++; + } + + if (put == initial_put) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) != + initial_put) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->put < initial_put); + break; + } + + /* we just set the new value of local_GP->put */ + + dev_dbg(xpc_chan, "local_GP->put changed to %lld, partid=%d, " + "channel=%d\n", put, ch->partid, ch->number); + + send_msgrequest = 1; + + /* + * We need to ensure that the message referenced by + * local_GP->put is not XPC_M_SN2_READY or that local_GP->put + * equals w_local_GP.put, so we'll go have a look. + */ + initial_put = put; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. + */ +static enum xp_retval +xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags, + struct xpc_msg_sn2 **address_of_msg) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + enum xp_retval ret; + s64 put; + + /* + * Get the next available message entry from the local message queue. + * If none are available, we'll make sure that we grab the latest + * GP values. + */ + ret = xpTimeout; + + while (1) { + + put = ch_sn2->w_local_GP.put; + smp_rmb(); /* guarantee that .put loads before .get */ + if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) { + + /* There are available message entries. We need to try + * to secure one for ourselves. We'll do this by trying + * to increment w_local_GP.put as long as someone else + * doesn't beat us to it. If they do, we'll have to + * try again. + */ + if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) == + put) { + /* we got the entry referenced by put */ + break; + } + continue; /* try again */ + } + + /* + * There aren't any available msg entries at this time. + * + * In waiting for a message entry to become available, + * we set a timeout in case the other side is not sending + * completion interrupts. This lets us fake a notify IRQ + * that will cause the notify IRQ handler to fetch the latest + * GP values as if an interrupt was sent by the other side. + */ + if (ret == xpTimeout) + xpc_send_chctl_local_msgrequest_sn2(ch); + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + /* get the message's address and initialize it */ + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (put % ch->local_nentries) * + ch->entry_size); + + DBUG_ON(msg->flags != 0); + msg->number = put; + + dev_dbg(xpc_chan, "w_local_GP.put changed to %lld; msg=0x%p, " + "msg_number=%lld, partid=%d, channel=%d\n", put + 1, + (void *)msg, msg->number, ch->partid, ch->number); + + *address_of_msg = msg; + return xpSuccess; +} + +/* + * Common code that does the actual sending of the message by advancing the + * local message queue's Put value and sends a chctl msgrequest to the + * partition the message is being sent to. + */ +static enum xp_retval +xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg = msg; + struct xpc_notify_sn2 *notify = notify; + s64 msg_number; + s64 put; + + DBUG_ON(notify_type == XPC_N_CALL && func == NULL); + + if (XPC_MSG_SIZE(payload_size) > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_sn2(ch, flags, &msg); + if (ret != xpSuccess) + goto out_1; + + msg_number = msg->number; + + if (notify_type != 0) { + /* + * Tell the remote side to send an ACK interrupt when the + * message has been delivered. + */ + msg->flags |= XPC_M_SN2_INTERRUPT; + + atomic_inc(&ch->n_to_notify); + + notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries]; + notify->func = func; + notify->key = key; + notify->type = notify_type; + + /* ??? Is a mb() needed here? */ + + if (ch->flags & XPC_C_DISCONNECTING) { + /* + * An error occurred between our last error check and + * this one. We will try to clear the type field from + * the notify entry. If we succeed then + * xpc_disconnect_channel() didn't already process + * the notify entry. + */ + if (cmpxchg(¬ify->type, notify_type, 0) == + notify_type) { + atomic_dec(&ch->n_to_notify); + ret = ch->reason; + } + goto out_1; + } + } + + memcpy(&msg->payload, payload, payload_size); + + msg->flags |= XPC_M_SN2_READY; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->put. + */ + smp_mb(); + + /* see if the message is next in line to be sent, if so send it */ + + put = ch_sn2->local_GP->put; + if (put == msg_number) + xpc_send_msgs_sn2(ch, put); + +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Now we actually acknowledge the messages that have been delivered and ack'd + * by advancing the cached remote message queue's Get value and if requested + * send a chctl msgrequest to the message sender's partition. + * + * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition + * that sent the message. + */ +static void +xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get = initial_get + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (get == ch_sn2->w_local_GP.get) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + remote_msgqueue + (get % + ch->remote_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_DONE)) + break; + + msg_flags |= msg->flags; + get++; + } + + if (get == initial_get) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) != + initial_get) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->get <= initial_get); + break; + } + + /* we just set the new value of local_GP->get */ + + dev_dbg(xpc_chan, "local_GP->get changed to %lld, partid=%d, " + "channel=%d\n", get, ch->partid, ch->number); + + send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT); + + /* + * We need to ensure that the message referenced by + * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get + * equals w_local_GP.get, so we'll go have a look. + */ + initial_get = get; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +static void +xpc_received_payload_sn2(struct xpc_channel *ch, void *payload) +{ + struct xpc_msg_sn2 *msg; + s64 msg_number; + s64 get; + + msg = container_of(payload, struct xpc_msg_sn2, payload); + msg_number = msg->number; + + dev_dbg(xpc_chan, "msg=0x%p, msg_number=%lld, partid=%d, channel=%d\n", + (void *)msg, msg_number, ch->partid, ch->number); + + DBUG_ON((((u64)msg - (u64)ch->sn.sn2.remote_msgqueue) / ch->entry_size) != + msg_number % ch->remote_nentries); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + DBUG_ON(msg->flags & XPC_M_SN2_DONE); + + msg->flags |= XPC_M_SN2_DONE; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->get. + */ + smp_mb(); + + /* + * See if this message is next in line to be acknowledged as having + * been delivered. + */ + get = ch->sn.sn2.local_GP->get; + if (get == msg_number) + xpc_acknowledge_msgs_sn2(ch, get, msg->flags); +} + +static struct xpc_arch_operations xpc_arch_ops_sn2 = { + .setup_partitions = xpc_setup_partitions_sn2, + .teardown_partitions = xpc_teardown_partitions_sn2, + .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2, + .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2, + .setup_rsvd_page = xpc_setup_rsvd_page_sn2, + + .allow_hb = xpc_allow_hb_sn2, + .disallow_hb = xpc_disallow_hb_sn2, + .disallow_all_hbs = xpc_disallow_all_hbs_sn2, + .increment_heartbeat = xpc_increment_heartbeat_sn2, + .offline_heartbeat = xpc_offline_heartbeat_sn2, + .online_heartbeat = xpc_online_heartbeat_sn2, + .heartbeat_init = xpc_heartbeat_init_sn2, + .heartbeat_exit = xpc_heartbeat_exit_sn2, + .get_remote_heartbeat = xpc_get_remote_heartbeat_sn2, + + .request_partition_activation = + xpc_request_partition_activation_sn2, + .request_partition_reactivation = + xpc_request_partition_reactivation_sn2, + .request_partition_deactivation = + xpc_request_partition_deactivation_sn2, + .cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_sn2, + + .setup_ch_structures = xpc_setup_ch_structures_sn2, + .teardown_ch_structures = xpc_teardown_ch_structures_sn2, + + .make_first_contact = xpc_make_first_contact_sn2, + + .get_chctl_all_flags = xpc_get_chctl_all_flags_sn2, + .send_chctl_closerequest = xpc_send_chctl_closerequest_sn2, + .send_chctl_closereply = xpc_send_chctl_closereply_sn2, + .send_chctl_openrequest = xpc_send_chctl_openrequest_sn2, + .send_chctl_openreply = xpc_send_chctl_openreply_sn2, + .send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2, + .process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2, + + .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2, + + .setup_msg_structures = xpc_setup_msg_structures_sn2, + .teardown_msg_structures = xpc_teardown_msg_structures_sn2, + + .indicate_partition_engaged = xpc_indicate_partition_engaged_sn2, + .indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2, + .partition_engaged = xpc_partition_engaged_sn2, + .any_partition_engaged = xpc_any_partition_engaged_sn2, + .assume_partition_disengaged = xpc_assume_partition_disengaged_sn2, + + .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2, + .send_payload = xpc_send_payload_sn2, + .get_deliverable_payload = xpc_get_deliverable_payload_sn2, + .received_payload = xpc_received_payload_sn2, + .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2, +}; + +int +xpc_init_sn2(void) +{ + int ret; + size_t buf_size; + + xpc_arch_ops = xpc_arch_ops_sn2; + + if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is " + "larger than %d\n", XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + buf_size = max(XPC_RP_VARS_SIZE, + XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2); + xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size, + GFP_KERNEL, + &xpc_remote_copy_buffer_base_sn2); + if (xpc_remote_copy_buffer_sn2 == NULL) { + dev_err(xpc_part, "can't get memory for remote copy buffer\n"); + return -ENOMEM; + } + + /* open up protections for IPI and [potentially] amo operations */ + xpc_allow_IPI_ops_sn2(); + xpc_allow_amo_ops_shub_wars_1_1_sn2(); + + /* + * This is safe to do before the xpc_hb_checker thread has started + * because the handler releases a wait queue. If an interrupt is + * received before the thread is waiting, it will not go to sleep, + * but rather immediately process the interrupt. + */ + ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0, + "xpc hb", NULL); + if (ret != 0) { + dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " + "errno=%d\n", -ret); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); + } + return ret; +} + +void +xpc_exit_sn2(void) +{ + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); +} diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c new file mode 100644 index 00000000000..8725d5e8ab0 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -0,0 +1,1749 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <asm/uv/uv_hub.h> +#if defined CONFIG_X86_64 +#include <asm/uv/bios.h> +#include <asm/uv/uv_irq.h> +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#endif +#include "../sgi-gru/gru.h" +#include "../sgi-gru/grukservices.h" +#include "xpc.h" + +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +struct uv_IO_APIC_route_entry { + __u64 vector : 8, + delivery_mode : 3, + dest_mode : 1, + delivery_status : 1, + polarity : 1, + __reserved_1 : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15, + dest : 32; +}; +#endif + +static struct xpc_heartbeat_uv *xpc_heartbeat_uv; + +#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) +#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_ACTIVATE_MSG_SIZE_UV) +#define XPC_ACTIVATE_IRQ_NAME "xpc_activate" + +#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES) +#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_NOTIFY_MSG_SIZE_UV) +#define XPC_NOTIFY_IRQ_NAME "xpc_notify" + +static struct xpc_gru_mq_uv *xpc_activate_mq_uv; +static struct xpc_gru_mq_uv *xpc_notify_mq_uv; + +static int +xpc_setup_partitions_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex); + spin_lock_init(&part_uv->flags_lock); + part_uv->remote_act_state = XPC_P_AS_INACTIVE; + } + return 0; +} + +static void +xpc_teardown_partitions_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + unsigned long irq_flags; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + if (part_uv->cached_activate_gru_mq_desc != NULL) { + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + kfree(part_uv->cached_activate_gru_mq_desc); + part_uv->cached_activate_gru_mq_desc = NULL; + mutex_unlock(&part_uv-> + cached_activate_gru_mq_desc_mutex); + } + } +} + +static int +xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) +{ + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + +#if defined CONFIG_X86_64 + mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, + UV_AFFINITY_CPU); + if (mq->irq < 0) { + dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", + -mq->irq); + return mq->irq; + } + + mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) + mq->irq = SGI_XPC_ACTIVATE; + else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) + mq->irq = SGI_XPC_NOTIFY; + else + return -EINVAL; + + mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value); +#else + #error not a supported configuration +#endif + + return 0; +} + +static void +xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) +{ +#if defined CONFIG_X86_64 + uv_teardown_irq(mq->irq); + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + int mmr_pnode; + unsigned long mmr_value; + + mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + mmr_value = 1UL << 16; + + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); +#else + #error not a supported configuration +#endif +} + +static int +xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) +{ + int ret; + +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + + ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", + ret); + return -EBUSY; + } +#elif defined CONFIG_X86_64 + ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, " + "ret=%d\n", ret); + return ret; + } +#else + #error not a supported configuration +#endif + + mq->watchlist_num = ret; + return 0; +} + +static void +xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) +{ + int ret; + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + +#if defined CONFIG_X86_64 + ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num); + BUG_ON(ret != BIOS_STATUS_SUCCESS); +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num); + BUG_ON(ret != SALRET_OK); +#else + #error not a supported configuration +#endif +} + +static struct xpc_gru_mq_uv * +xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, + irq_handler_t irq_handler) +{ + enum xp_retval xp_ret; + int ret; + int nid; + int nasid; + int pg_order; + struct page *page; + struct xpc_gru_mq_uv *mq; + struct uv_IO_APIC_route_entry *mmr_value; + + mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL); + if (mq == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " + "a xpc_gru_mq_uv structure\n"); + ret = -ENOMEM; + goto out_0; + } + + mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc), + GFP_KERNEL); + if (mq->gru_mq_desc == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " + "a gru_message_queue_desc structure\n"); + ret = -ENOMEM; + goto out_1; + } + + pg_order = get_order(mq_size); + mq->order = pg_order + PAGE_SHIFT; + mq_size = 1UL << mq->order; + + mq->mmr_blade = uv_cpu_to_blade_id(cpu); + + nid = cpu_to_node(cpu); + page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + pg_order); + if (page == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " + "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); + ret = -ENOMEM; + goto out_2; + } + mq->address = page_address(page); + + /* enable generation of irq when GRU mq operation occurs to this mq */ + ret = xpc_gru_mq_watchlist_alloc_uv(mq); + if (ret != 0) + goto out_3; + + ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name); + if (ret != 0) + goto out_4; + + ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL); + if (ret != 0) { + dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n", + mq->irq, -ret); + goto out_5; + } + + nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu)); + + mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value; + ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size, + nasid, mmr_value->vector, mmr_value->dest); + if (ret != 0) { + dev_err(xpc_part, "gru_create_message_queue() returned " + "error=%d\n", ret); + ret = -EINVAL; + goto out_6; + } + + /* allow other partitions to access this GRU mq */ + xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size); + if (xp_ret != xpSuccess) { + ret = -EACCES; + goto out_6; + } + + return mq; + + /* something went wrong */ +out_6: + free_irq(mq->irq, NULL); +out_5: + xpc_release_gru_mq_irq_uv(mq); +out_4: + xpc_gru_mq_watchlist_free_uv(mq); +out_3: + free_pages((unsigned long)mq->address, pg_order); +out_2: + kfree(mq->gru_mq_desc); +out_1: + kfree(mq); +out_0: + return ERR_PTR(ret); +} + +static void +xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq) +{ + unsigned int mq_size; + int pg_order; + int ret; + + /* disallow other partitions to access GRU mq */ + mq_size = 1UL << mq->order; + ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size); + BUG_ON(ret != xpSuccess); + + /* unregister irq handler and release mq irq/vector mapping */ + free_irq(mq->irq, NULL); + xpc_release_gru_mq_irq_uv(mq); + + /* disable generation of irq when GRU mq op occurs to this mq */ + xpc_gru_mq_watchlist_free_uv(mq); + + pg_order = mq->order - PAGE_SHIFT; + free_pages((unsigned long)mq->address, pg_order); + + kfree(mq); +} + +static enum xp_retval +xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg, + size_t msg_size) +{ + enum xp_retval xp_ret; + int ret; + + while (1) { + ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size); + if (ret == MQE_OK) { + xp_ret = xpSuccess; + break; + } + + if (ret == MQE_QUEUE_FULL) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_QUEUE_FULL\n"); + /* !!! handle QLimit reached; delay & try again */ + /* ??? Do we add a limit to the number of retries? */ + (void)msleep_interruptible(10); + } else if (ret == MQE_CONGESTION) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_CONGESTION\n"); + /* !!! handle LB Overflow; simply try again */ + /* ??? Do we add a limit to the number of retries? */ + } else { + /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */ + dev_err(xpc_chan, "gru_send_message_gpa() returned " + "error=%d\n", ret); + xp_ret = xpGruSendMqError; + break; + } + } + return xp_ret; +} + +static void +xpc_process_activate_IRQ_rcvd_uv(void) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + u8 act_state_req; + + DBUG_ON(xpc_activate_IRQ_rcvd == 0); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part = &xpc_partitions[partid]; + + if (part->sn.uv.act_state_req == 0) + continue; + + xpc_activate_IRQ_rcvd--; + BUG_ON(xpc_activate_IRQ_rcvd < 0); + + act_state_req = part->sn.uv.act_state_req; + part->sn.uv.act_state_req = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + if (act_state_req == XPC_P_ASR_ACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else if (part->act_state == XPC_P_AS_DEACTIVATING) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) { + XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason); + + } else { + BUG(); + } + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (xpc_activate_IRQ_rcvd == 0) + break; + } + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + +} + +static void +xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, + struct xpc_activate_mq_msghdr_uv *msg_hdr, + int *wakeup_hb_checker) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_openclose_args *args; + + part_uv->remote_act_state = msg_hdr->act_state; + + switch (msg_hdr->type) { + case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV: + /* syncing of remote_act_state was just done above */ + break; + + case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_activate_req_uv *msg; + + /* + * ??? Do we deal here with ts_jiffies being different + * ??? if act_state != XPC_P_AS_INACTIVE instead of + * ??? below? + */ + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_activate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; + part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ + part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; + part_uv->heartbeat_gpa = msg->heartbeat_gpa; + + if (msg->activate_gru_mq_desc_gpa != + part_uv->activate_gru_mq_desc_gpa) { + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + part_uv->activate_gru_mq_desc_gpa = + msg->activate_gru_mq_desc_gpa; + } + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + break; + } + case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_deactivate_req_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_deactivate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = msg->reason; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_closerequest_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closerequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->reason = msg->reason; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: { + struct xpc_activate_mq_msg_chctl_closereply_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closereply_uv, + hdr); + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_openrequest_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openrequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->entry_size = msg->entry_size; + args->local_nentries = msg->local_nentries; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: { + struct xpc_activate_mq_msg_chctl_openreply_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openreply_uv, hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->remote_nentries = msg->remote_nentries; + args->local_nentries = msg->local_nentries; + args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: { + struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_opencomplete_uv, hdr); + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + } + case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + default: + dev_err(xpc_part, "received unknown activate_mq msg type=%d " + "from partition=%d\n", msg_hdr->type, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadMsgType; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + + if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies && + part->remote_rp_ts_jiffies != 0) { + /* + * ??? Does what we do here need to be sensitive to + * ??? act_state or remote_act_state? + */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + } +} + +static irqreturn_t +xpc_handle_activate_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr; + short partid; + struct xpc_partition *part; + int wakeup_hb_checker = 0; + int part_referenced; + + while (1) { + msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc); + if (msg_hdr == NULL) + break; + + partid = msg_hdr->partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() " + "received invalid partid=0x%x in message\n", + partid); + } else { + part = &xpc_partitions[partid]; + + part_referenced = xpc_part_ref(part); + xpc_handle_activate_mq_msg_uv(part, msg_hdr, + &wakeup_hb_checker); + if (part_referenced) + xpc_part_deref(part); + } + + gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr); + } + + if (wakeup_hb_checker) + wake_up_interruptible(&xpc_activate_IRQ_wq); + + return IRQ_HANDLED; +} + +static enum xp_retval +xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc, + unsigned long gru_mq_desc_gpa) +{ + enum xp_retval ret; + + ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa, + sizeof(struct gru_message_queue_desc)); + if (ret == xpSuccess) + gru_mq_desc->mq = NULL; + + return ret; +} + +static enum xp_retval +xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size, + int msg_type) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr = msg; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct gru_message_queue_desc *gru_mq_desc; + unsigned long irq_flags; + enum xp_retval ret; + + DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV); + + msg_hdr->type = msg_type; + msg_hdr->partid = xp_partition_id; + msg_hdr->act_state = part->act_state; + msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies; + + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); +again: + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) { + gru_mq_desc = part_uv->cached_activate_gru_mq_desc; + if (gru_mq_desc == NULL) { + gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (gru_mq_desc == NULL) { + ret = xpNoMemory; + goto done; + } + part_uv->cached_activate_gru_mq_desc = gru_mq_desc; + } + + ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc, + part_uv-> + activate_gru_mq_desc_gpa); + if (ret != xpSuccess) + goto done; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + } + + /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */ + ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg, + msg_size); + if (ret != xpSuccess) { + smp_rmb(); /* ensure a fresh copy of part_uv->flags */ + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) + goto again; + } +done: + mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex); + return ret; +} + +static void +xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg, + size_t msg_size, int msg_type) +{ + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) + XPC_DEACTIVATE_PARTITION(part, ret); +} + +static void +xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags, + void *msg, size_t msg_size, int msg_type) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + XPC_DEACTIVATE_PARTITION(part, ret); + + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } +} + +static void +xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + + /* + * !!! Make our side think that the remote partition sent an activate + * !!! mq message our way by doing what the activate IRQ handler would + * !!! do had one really been sent. + */ + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = act_state_req; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + s64 status; + enum xp_retval ret; + +#if defined CONFIG_X86_64 + status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa, + (u64 *)len); + if (status == BIOS_STATUS_SUCCESS) + ret = xpSuccess; + else if (status == BIOS_STATUS_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpBiosError; + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + +#else + #error not a supported configuration +#endif + + return ret; +} + +static int +xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp) +{ + xpc_heartbeat_uv = + &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat; + rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv); + rp->sn.uv.activate_gru_mq_desc_gpa = + uv_gpa(xpc_activate_mq_uv->gru_mq_desc); + return 0; +} + +static void +xpc_allow_hb_uv(short partid) +{ +} + +static void +xpc_disallow_hb_uv(short partid) +{ +} + +static void +xpc_disallow_all_hbs_uv(void) +{ +} + +static void +xpc_increment_heartbeat_uv(void) +{ + xpc_heartbeat_uv->value++; +} + +static void +xpc_offline_heartbeat_uv(void) +{ + xpc_increment_heartbeat_uv(); + xpc_heartbeat_uv->offline = 1; +} + +static void +xpc_online_heartbeat_uv(void) +{ + xpc_increment_heartbeat_uv(); + xpc_heartbeat_uv->offline = 0; +} + +static void +xpc_heartbeat_init_uv(void) +{ + xpc_heartbeat_uv->value = 1; + xpc_heartbeat_uv->offline = 0; +} + +static void +xpc_heartbeat_exit_uv(void) +{ + xpc_offline_heartbeat_uv(); +} + +static enum xp_retval +xpc_get_remote_heartbeat_uv(struct xpc_partition *part) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + enum xp_retval ret; + + ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat), + part_uv->heartbeat_gpa, + sizeof(struct xpc_heartbeat_uv)); + if (ret != xpSuccess) + return ret; + + if (part_uv->cached_heartbeat.value == part->last_heartbeat && + !part_uv->cached_heartbeat.offline) { + + ret = xpNoHeartbeat; + } else { + part->last_heartbeat = part_uv->cached_heartbeat.value; + } + return ret; +} + +static void +xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_gpa, int nasid) +{ + short partid = remote_rp->SAL_partid; + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_activate_mq_msg_activate_req_uv msg; + + part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ + part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; + part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa; + part->sn.uv.activate_gru_mq_desc_gpa = + remote_rp->sn.uv.activate_gru_mq_desc_gpa; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { + msg.rp_gpa = uv_gpa(xpc_rsvd_page); + msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa; + msg.activate_gru_mq_desc_gpa = + xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); + } + + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_reactivation_uv(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_deactivation_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_deactivate_req_uv msg; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING && + part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) { + + msg.reason = part->reason; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV); + } +} + +static void +xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static void +xpc_init_fifo_uv(struct xpc_fifo_head_uv *head) +{ + head->first = NULL; + head->last = NULL; + spin_lock_init(&head->lock); + head->n_entries = 0; +} + +static void * +xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head) +{ + unsigned long irq_flags; + struct xpc_fifo_entry_uv *first; + + spin_lock_irqsave(&head->lock, irq_flags); + first = head->first; + if (head->first != NULL) { + head->first = first->next; + if (head->first == NULL) + head->last = NULL; + + head->n_entries--; + BUG_ON(head->n_entries < 0); + + first->next = NULL; + } + spin_unlock_irqrestore(&head->lock, irq_flags); + return first; +} + +static void +xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head, + struct xpc_fifo_entry_uv *last) +{ + unsigned long irq_flags; + + last->next = NULL; + spin_lock_irqsave(&head->lock, irq_flags); + if (head->last != NULL) + head->last->next = last; + else + head->first = last; + head->last = last; + head->n_entries++; + spin_unlock_irqrestore(&head->lock, irq_flags); +} + +static int +xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head) +{ + return head->n_entries; +} + +/* + * Setup the channel structures that are uv specific. + */ +static enum xp_retval +xpc_setup_ch_structures_uv(struct xpc_partition *part) +{ + struct xpc_channel_uv *ch_uv; + int ch_number; + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_uv = &part->channels[ch_number].sn.uv; + + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + } + + return xpSuccess; +} + +/* + * Teardown the channel structures that are uv specific. + */ +static void +xpc_teardown_ch_structures_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static enum xp_retval +xpc_make_first_contact_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + /* + * We send a sync msg to get the remote partition's remote_act_state + * updated to our current act_state which at this point should + * be XPC_P_AS_ACTIVATING. + */ + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV); + + while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) || + (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) { + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +static u64 +xpc_get_chctl_all_flags_uv(struct xpc_partition *part) +{ + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + return chctl.all_flags; +} + +static enum xp_retval +xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_send_msg_slot_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv); + ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->send_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = &ch_uv->send_msg_slots[entry]; + + msg_slot->msg_slot_number = entry; + xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list, + &msg_slot->next); + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) + ch->local_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +static enum xp_retval +xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + nbytes = nentries * ch->entry_size; + ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->recv_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = ch_uv->recv_msg_slots + + entry * ch->entry_size; + + msg_slot->hdr.msg_slot_number = entry; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) + ch->remote_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +/* + * Allocate msg_slots associated with the channel. + */ +static enum xp_retval +xpc_setup_msg_structures_uv(struct xpc_channel *ch) +{ + static enum xp_retval ret; + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (ch_uv->cached_notify_gru_mq_desc == NULL) + return xpNoMemory; + + ret = xpc_allocate_send_msg_slot_uv(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_recv_msg_slot_uv(ch); + if (ret != xpSuccess) { + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + } + } + return ret; +} + +/* + * Free up msg_slots and clear other stuff that were setup for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + kfree(ch_uv->cached_notify_gru_mq_desc); + ch_uv->cached_notify_gru_mq_desc = NULL; + + if (ch->flags & XPC_C_SETUP) { + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + kfree(ch_uv->recv_msg_slots); + } +} + +static void +xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closerequest_uv msg; + + msg.ch_number = ch->number; + msg.reason = ch->reason; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV); +} + +static void +xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closereply_uv msg; + + msg.ch_number = ch->number; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV); +} + +static void +xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openrequest_uv msg; + + msg.ch_number = ch->number; + msg.entry_size = ch->entry_size; + msg.local_nentries = ch->local_nentries; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV); +} + +static void +xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openreply_uv msg; + + msg.ch_number = ch->number; + msg.local_nentries = ch->local_nentries; + msg.remote_nentries = ch->remote_nentries; + msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc); + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV); +} + +static void +xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_opencomplete_uv msg; + + msg.ch_number = ch->number; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV); +} + +static void +xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); +} + +static enum xp_retval +xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch, + unsigned long gru_mq_desc_gpa) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL); + return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc, + gru_mq_desc_gpa); +} + +static void +xpc_indicate_partition_engaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV); +} + +static void +xpc_indicate_partition_disengaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV); +} + +static void +xpc_assume_partition_disengaged_uv(short partid) +{ + struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv; + unsigned long irq_flags; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); +} + +static int +xpc_partition_engaged_uv(short partid) +{ + return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0; +} + +static int +xpc_any_partition_engaged_uv(void) +{ + struct xpc_partition_uv *part_uv; + short partid; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0) + return 1; + } + return 0; +} + +static enum xp_retval +xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags, + struct xpc_send_msg_slot_uv **address_of_msg_slot) +{ + enum xp_retval ret; + struct xpc_send_msg_slot_uv *msg_slot; + struct xpc_fifo_entry_uv *entry; + + while (1) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list); + if (entry != NULL) + break; + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next); + *address_of_msg_slot = msg_slot; + return xpSuccess; +} + +static void +xpc_free_msg_slot_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot) +{ + xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next); + + /* wakeup anyone waiting for a free msg slot */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); +} + +static void +xpc_notify_sender_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot, + enum xp_retval reason) +{ + xpc_notify_func func = msg_slot->func; + + if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) { + + atomic_dec(&ch->n_to_notify); + + dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + + func(reason, ch->partid, ch->number, msg_slot->key); + + dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + } +} + +static void +xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry = msg->hdr.msg_slot_number % ch->local_nentries; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + + BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number); + msg_slot->msg_slot_number += ch->local_nentries; + + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered); + + xpc_free_msg_slot_uv(ch, msg_slot); +} + +static void +xpc_handle_notify_mq_msg_uv(struct xpc_partition *part, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_channel *ch; + struct xpc_channel_uv *ch_uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int ch_number = msg->hdr.ch_number; + + if (unlikely(ch_number >= part->nchannels)) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid " + "channel number=0x%x in message from partid=%d\n", + ch_number, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadChannelNumber; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return; + } + + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* see if we're really dealing with an ACK for a previously sent msg */ + if (msg->hdr.size == 0) { + xpc_handle_notify_mq_ack_uv(ch, msg); + xpc_msgqueue_deref(ch); + return; + } + + /* we're dealing with a normal message sent via the notify_mq */ + ch_uv = &ch->sn.uv; + + msg_slot = ch_uv->recv_msg_slots + + (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size; + + BUG_ON(msg_slot->hdr.size != 0); + + memcpy(msg_slot, msg, msg->hdr.size); + + xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { + /* + * If there is an existing idle kthread get it to deliver + * the payload, otherwise we'll have to get the channel mgr + * for this partition to create a kthread to do the delivery. + */ + if (atomic_read(&ch->kthreads_idle) > 0) + wake_up_nr(&ch->idle_wq, 1); + else + xpc_send_chctl_local_msgrequest_uv(part, ch->number); + } + xpc_msgqueue_deref(ch); +} + +static irqreturn_t +xpc_handle_notify_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_notify_mq_msg_uv *msg; + short partid; + struct xpc_partition *part; + + while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) != + NULL) { + + partid = msg->hdr.partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received " + "invalid partid=0x%x in message\n", partid); + } else { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + xpc_handle_notify_mq_msg_uv(part, msg); + xpc_part_deref(part); + } + } + + gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg); + } + + return IRQ_HANDLED; +} + +static int +xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch) +{ + return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list); +} + +static void +xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + int ndeliverable_payloads; + + xpc_msgqueue_ref(ch); + + ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch); + + if (ndeliverable_payloads > 0 && + (ch->flags & XPC_C_CONNECTED) && + (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) { + + xpc_activate_kthreads(ch, ndeliverable_payloads); + } + + xpc_msgqueue_deref(ch); +} + +static enum xp_retval +xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_send_msg_slot_uv *msg_slot = NULL; + struct xpc_notify_mq_msg_uv *msg; + u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV]; + size_t msg_size; + + DBUG_ON(notify_type != XPC_N_CALL); + + msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size; + if (msg_size > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot); + if (ret != xpSuccess) + goto out_1; + + if (func != NULL) { + atomic_inc(&ch->n_to_notify); + + msg_slot->key = key; + smp_wmb(); /* a non-NULL func must hit memory after the key */ + msg_slot->func = func; + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_2; + } + } + + msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer; + msg->hdr.partid = xp_partition_id; + msg->hdr.ch_number = ch->number; + msg->hdr.size = msg_size; + msg->hdr.msg_slot_number = msg_slot->msg_slot_number; + memcpy(&msg->payload, payload, payload_size); + + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, + msg_size); + if (ret == xpSuccess) + goto out_1; + + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); +out_2: + if (func != NULL) { + /* + * Try to NULL the msg_slot's func field. If we fail, then + * xpc_notify_senders_of_disconnect_uv() beat us to it, in which + * case we need to pretend we succeeded to send the message + * since the user will get a callout for the disconnect error + * by xpc_notify_senders_of_disconnect_uv(), and to also get an + * error returned here will confuse them. Additionally, since + * in this case the channel is being disconnected we don't need + * to put the the msg_slot back on the free list. + */ + if (cmpxchg(&msg_slot->func, func, NULL) != func) { + ret = xpSuccess; + goto out_1; + } + + msg_slot->key = NULL; + atomic_dec(&ch->n_to_notify); + } + xpc_free_msg_slot_uv(ch, msg_slot); +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Tell the callers of xpc_send_notify() that the status of their payloads + * is unknown because the channel is now disconnecting. + * + * We don't worry about putting these msg_slots on the free list since the + * msg_slots themselves are about to be kfree'd. + */ +static void +xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry; + + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + + for (entry = 0; entry < ch->local_nentries; entry++) { + + if (atomic_read(&ch->n_to_notify) == 0) + break; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, ch->reason); + } +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_uv(struct xpc_channel *ch) +{ + struct xpc_fifo_entry_uv *entry; + struct xpc_notify_mq_msg_uv *msg; + void *payload = NULL; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list); + if (entry != NULL) { + msg = container_of(entry, struct xpc_notify_mq_msg_uv, + hdr.u.next); + payload = &msg->payload; + } + } + return payload; +} + +static void +xpc_received_payload_uv(struct xpc_channel *ch, void *payload) +{ + struct xpc_notify_mq_msg_uv *msg; + enum xp_retval ret; + + msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload); + + /* return an ACK to the sender of this message */ + + msg->hdr.partid = xp_partition_id; + msg->hdr.size = 0; /* size of zero indicates this is an ACK */ + + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, + sizeof(struct xpc_notify_mq_msghdr_uv)); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); +} + +static struct xpc_arch_operations xpc_arch_ops_uv = { + .setup_partitions = xpc_setup_partitions_uv, + .teardown_partitions = xpc_teardown_partitions_uv, + .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv, + .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv, + .setup_rsvd_page = xpc_setup_rsvd_page_uv, + + .allow_hb = xpc_allow_hb_uv, + .disallow_hb = xpc_disallow_hb_uv, + .disallow_all_hbs = xpc_disallow_all_hbs_uv, + .increment_heartbeat = xpc_increment_heartbeat_uv, + .offline_heartbeat = xpc_offline_heartbeat_uv, + .online_heartbeat = xpc_online_heartbeat_uv, + .heartbeat_init = xpc_heartbeat_init_uv, + .heartbeat_exit = xpc_heartbeat_exit_uv, + .get_remote_heartbeat = xpc_get_remote_heartbeat_uv, + + .request_partition_activation = + xpc_request_partition_activation_uv, + .request_partition_reactivation = + xpc_request_partition_reactivation_uv, + .request_partition_deactivation = + xpc_request_partition_deactivation_uv, + .cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_uv, + + .setup_ch_structures = xpc_setup_ch_structures_uv, + .teardown_ch_structures = xpc_teardown_ch_structures_uv, + + .make_first_contact = xpc_make_first_contact_uv, + + .get_chctl_all_flags = xpc_get_chctl_all_flags_uv, + .send_chctl_closerequest = xpc_send_chctl_closerequest_uv, + .send_chctl_closereply = xpc_send_chctl_closereply_uv, + .send_chctl_openrequest = xpc_send_chctl_openrequest_uv, + .send_chctl_openreply = xpc_send_chctl_openreply_uv, + .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv, + .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv, + + .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv, + + .setup_msg_structures = xpc_setup_msg_structures_uv, + .teardown_msg_structures = xpc_teardown_msg_structures_uv, + + .indicate_partition_engaged = xpc_indicate_partition_engaged_uv, + .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv, + .assume_partition_disengaged = xpc_assume_partition_disengaged_uv, + .partition_engaged = xpc_partition_engaged_uv, + .any_partition_engaged = xpc_any_partition_engaged_uv, + + .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv, + .send_payload = xpc_send_payload_uv, + .get_deliverable_payload = xpc_get_deliverable_payload_uv, + .received_payload = xpc_received_payload_uv, + .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, +}; + +int +xpc_init_uv(void) +{ + xpc_arch_ops = xpc_arch_ops_uv; + + if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n", + XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, + XPC_ACTIVATE_IRQ_NAME, + xpc_handle_activate_IRQ_uv); + if (IS_ERR(xpc_activate_mq_uv)) + return PTR_ERR(xpc_activate_mq_uv); + + xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, + XPC_NOTIFY_IRQ_NAME, + xpc_handle_notify_IRQ_uv); + if (IS_ERR(xpc_notify_mq_uv)) { + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); + return PTR_ERR(xpc_notify_mq_uv); + } + + return 0; +} + +void +xpc_exit_uv(void) +{ + xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); +} diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c new file mode 100644 index 00000000000..16f0abda142 --- /dev/null +++ b/drivers/misc/sgi-xp/xpnet.c @@ -0,0 +1,606 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved. + */ + +/* + * Cross Partition Network Interface (XPNET) support + * + * XPNET provides a virtual network layered on top of the Cross + * Partition communication layer. + * + * XPNET provides direct point-to-point and broadcast-like support + * for an ethernet-like device. The ethernet broadcast medium is + * replaced with a point-to-point message structure which passes + * pointers to a DMA-capable block that a remote partition should + * retrieve and pass to the upper level networking layer. + * + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include "xp.h" + +/* + * The message payload transferred by XPC. + * + * buf_pa is the physical address where the DMA should pull from. + * + * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a + * cacheline boundary. To accomplish this, we record the number of + * bytes from the beginning of the first cacheline to the first useful + * byte of the skb (leadin_ignore) and the number of bytes from the + * last useful byte of the skb to the end of the last cacheline + * (tailout_ignore). + * + * size is the number of bytes to transfer which includes the skb->len + * (useful bytes of the senders skb) plus the leadin and tailout + */ +struct xpnet_message { + u16 version; /* Version for this message */ + u16 embedded_bytes; /* #of bytes embedded in XPC message */ + u32 magic; /* Special number indicating this is xpnet */ + unsigned long buf_pa; /* phys address of buffer to retrieve */ + u32 size; /* #of bytes in buffer */ + u8 leadin_ignore; /* #of bytes to ignore at the beginning */ + u8 tailout_ignore; /* #of bytes to ignore at the end */ + unsigned char data; /* body of small packets */ +}; + +/* + * Determine the size of our message, the cacheline aligned size, + * and then the number of message will request from XPC. + * + * XPC expects each message to exist in an individual cacheline. + */ +#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE +#define XPNET_MSG_DATA_MAX \ + (XPNET_MSG_SIZE - offsetof(struct xpnet_message, data)) +#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE) + +#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) +#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) + +/* + * Version number of XPNET implementation. XPNET can always talk to versions + * with same major #, and never talk to versions with a different version. + */ +#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) +#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) + +#define XPNET_VERSION _XPNET_VERSION(1, 0) /* version 1.0 */ +#define XPNET_VERSION_EMBED _XPNET_VERSION(1, 1) /* version 1.1 */ +#define XPNET_MAGIC 0x88786984 /* "XNET" */ + +#define XPNET_VALID_MSG(_m) \ + ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ + && (msg->magic == XPNET_MAGIC)) + +#define XPNET_DEVICE_NAME "xp0" + +/* + * When messages are queued with xpc_send_notify, a kmalloc'd buffer + * of the following type is passed as a notification cookie. When the + * notification function is called, we use the cookie to decide + * whether all outstanding message sends have completed. The skb can + * then be released. + */ +struct xpnet_pending_msg { + struct sk_buff *skb; + atomic_t use_count; +}; + +struct net_device *xpnet_device; + +/* + * When we are notified of other partitions activating, we add them to + * our bitmask of partitions to which we broadcast. + */ +static unsigned long *xpnet_broadcast_partitions; +/* protect above */ +static DEFINE_SPINLOCK(xpnet_broadcast_lock); + +/* + * Since the Block Transfer Engine (BTE) is being used for the transfer + * and it relies upon cache-line size transfers, we need to reserve at + * least one cache-line for head and tail alignment. The BTE is + * limited to 8MB transfers. + * + * Testing has shown that changing MTU to greater than 64KB has no effect + * on TCP as the two sides negotiate a Max Segment Size that is limited + * to 64K. Other protocols May use packets greater than this, but for + * now, the default is 64KB. + */ +#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) +/* 32KB has been determined to be the ideal */ +#define XPNET_DEF_MTU (0x8000UL) + +/* + * The partid is encapsulated in the MAC address beginning in the following + * octet and it consists of two octets. + */ +#define XPNET_PARTID_OCTET 2 + +/* Define the XPNET debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpnet_dbg_name = { + .name = "xpnet" +}; + +struct device xpnet_dbg_subname = { + .init_name = "", /* set to "" */ + .driver = &xpnet_dbg_name +}; + +struct device *xpnet = &xpnet_dbg_subname; + +/* + * Packet was recevied by XPC and forwarded to us. + */ +static void +xpnet_receive(short partid, int channel, struct xpnet_message *msg) +{ + struct sk_buff *skb; + void *dst; + enum xp_retval ret; + + if (!XPNET_VALID_MSG(msg)) { + /* + * Packet with a different XPC version. Ignore. + */ + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + /* reserve an extra cache line */ + skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); + if (!skb) { + dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", + msg->size + L1_CACHE_BYTES); + + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + + /* + * The allocated skb has some reserved space. + * In order to use xp_remote_memcpy(), we need to get the + * skb->data pointer moved forward. + */ + skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & + (L1_CACHE_BYTES - 1)) + + msg->leadin_ignore)); + + /* + * Update the tail pointer to indicate data actually + * transferred. + */ + skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); + + /* + * Move the data over from the other side. + */ + if ((XPNET_VERSION_MINOR(msg->version) == 1) && + (msg->embedded_bytes != 0)) { + dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " + "%lu)\n", skb->data, &msg->data, + (size_t)msg->embedded_bytes); + + skb_copy_to_linear_data(skb, &msg->data, + (size_t)msg->embedded_bytes); + } else { + dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1)); + dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" + "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst, + (void *)msg->buf_pa, msg->size); + + ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size); + if (ret != xpSuccess) { + /* + * !!! Need better way of cleaning skb. Currently skb + * !!! appears in_use and we can't just call + * !!! dev_kfree_skb. + */ + dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) " + "returned error=0x%x\n", dst, + (void *)msg->buf_pa, msg->size, ret); + + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + } + + dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *)skb->head, + (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), + skb->len); + + skb->protocol = eth_type_trans(skb, xpnet_device); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + dev_dbg(xpnet, "passing skb to network layer\n" + "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", + (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), + skb_end_pointer(skb), skb->len); + + xpnet_device->stats.rx_packets++; + xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN; + + netif_rx_ni(skb); + xpc_received(partid, channel, (void *)msg); +} + +/* + * This is the handler which XPC calls during any sort of change in + * state or message reception on a connection. + */ +static void +xpnet_connection_activity(enum xp_retval reason, short partid, int channel, + void *data, void *key) +{ + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(channel != XPC_NET_CHANNEL); + + switch (reason) { + case xpMsgReceived: /* message received */ + DBUG_ON(data == NULL); + + xpnet_receive(partid, channel, (struct xpnet_message *)data); + break; + + case xpConnected: /* connection completed to a partition */ + spin_lock_bh(&xpnet_broadcast_lock); + __set_bit(partid, xpnet_broadcast_partitions); + spin_unlock_bh(&xpnet_broadcast_lock); + + netif_carrier_on(xpnet_device); + + dev_dbg(xpnet, "%s connected to partition %d\n", + xpnet_device->name, partid); + break; + + default: + spin_lock_bh(&xpnet_broadcast_lock); + __clear_bit(partid, xpnet_broadcast_partitions); + spin_unlock_bh(&xpnet_broadcast_lock); + + if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions, + xp_max_npartitions)) { + netif_carrier_off(xpnet_device); + } + + dev_dbg(xpnet, "%s disconnected from partition %d\n", + xpnet_device->name, partid); + break; + } +} + +static int +xpnet_dev_open(struct net_device *dev) +{ + enum xp_retval ret; + + dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " + "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, + (unsigned long)XPNET_MSG_SIZE, + (unsigned long)XPNET_MSG_NENTRIES, + (unsigned long)XPNET_MAX_KTHREADS, + (unsigned long)XPNET_MAX_IDLE_KTHREADS); + + ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, + XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, + XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); + if (ret != xpSuccess) { + dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " + "ret=%d\n", dev->name, ret); + + return -ENOMEM; + } + + dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); + + return 0; +} + +static int +xpnet_dev_stop(struct net_device *dev) +{ + xpc_disconnect(XPC_NET_CHANNEL); + + dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); + + return 0; +} + +static int +xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) +{ + /* 68 comes from min TCP+IP+MAC header */ + if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { + dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " + "between 68 and %ld\n", dev->name, new_mtu, + XPNET_MAX_MTU); + return -EINVAL; + } + + dev->mtu = new_mtu; + dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); + return 0; +} + +/* + * Notification that the other end has received the message and + * DMA'd the skb information. At this point, they are done with + * our side. When all recipients are done processing, we + * release the skb and then release our pending message structure. + */ +static void +xpnet_send_completed(enum xp_retval reason, short partid, int channel, + void *__qm) +{ + struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm; + + DBUG_ON(queued_msg == NULL); + + dev_dbg(xpnet, "message to %d notified with reason %d\n", + partid, reason); + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_dbg(xpnet, "all acks for skb->head=-x%p\n", + (void *)queued_msg->skb->head); + + dev_kfree_skb_any(queued_msg->skb); + kfree(queued_msg); + } +} + +static void +xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg, + u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid) +{ + u8 msg_buffer[XPNET_MSG_SIZE]; + struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer; + u16 msg_size = sizeof(struct xpnet_message); + enum xp_retval ret; + + msg->embedded_bytes = embedded_bytes; + if (unlikely(embedded_bytes != 0)) { + msg->version = XPNET_VERSION_EMBED; + dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", + &msg->data, skb->data, (size_t)embedded_bytes); + skb_copy_from_linear_data(skb, &msg->data, + (size_t)embedded_bytes); + msg_size += embedded_bytes - 1; + } else { + msg->version = XPNET_VERSION; + } + msg->magic = XPNET_MAGIC; + msg->size = end_addr - start_addr; + msg->leadin_ignore = (u64)skb->data - start_addr; + msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); + msg->buf_pa = xp_pa((void *)start_addr); + + dev_dbg(xpnet, "sending XPC message to %d:%d\n" + "msg->buf_pa=0x%lx, msg->size=%u, " + "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", + dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + atomic_inc(&queued_msg->use_count); + + ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg, + msg_size, xpnet_send_completed, queued_msg); + if (unlikely(ret != xpSuccess)) + atomic_dec(&queued_msg->use_count); +} + +/* + * Network layer has formatted a packet (skb) and is ready to place it + * "on the wire". Prepare and send an xpnet_message to all partitions + * which have connected with us and are targets of this packet. + * + * MAC-NOTE: For the XPNET driver, the MAC address contains the + * destination partid. If the destination partid octets are 0xffff, + * this packet is to be broadcast to all connected partitions. + */ +static int +xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct xpnet_pending_msg *queued_msg; + u64 start_addr, end_addr; + short dest_partid; + u16 embedded_bytes = 0; + + dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *)skb->head, + (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), + skb->len); + + if (skb->data[0] == 0x33) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; /* nothing needed to be done */ + } + + /* + * The xpnet_pending_msg tracks how many outstanding + * xpc_send_notifies are relying on this skb. When none + * remain, release the skb. + */ + queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); + if (queued_msg == NULL) { + dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " + "packet\n", sizeof(struct xpnet_pending_msg)); + + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + /* get the beginning of the first cacheline and end of last */ + start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1)); + end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); + + /* calculate how many bytes to embed in the XPC message */ + if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { + /* skb->data does fit so embed */ + embedded_bytes = skb->len; + } + + /* + * Since the send occurs asynchronously, we set the count to one + * and begin sending. Any sends that happen to complete before + * we are done sending will not free the skb. We will be left + * with that task during exit. This also handles the case of + * a packet destined for a partition which is no longer up. + */ + atomic_set(&queued_msg->use_count, 1); + queued_msg->skb = skb; + + if (skb->data[0] == 0xff) { + /* we are being asked to broadcast to all partitions */ + for_each_bit(dest_partid, xpnet_broadcast_partitions, + xp_max_npartitions) { + + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); + } + } else { + dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1]; + dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8; + + if (dest_partid >= 0 && + dest_partid < xp_max_npartitions && + test_bit(dest_partid, xpnet_broadcast_partitions) != 0) { + + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); + } + } + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_kfree_skb(skb); + kfree(queued_msg); + } + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + return NETDEV_TX_OK; +} + +/* + * Deal with transmit timeouts coming from the network layer. + */ +static void +xpnet_dev_tx_timeout(struct net_device *dev) +{ + dev->stats.tx_errors++; +} + +static const struct net_device_ops xpnet_netdev_ops = { + .ndo_open = xpnet_dev_open, + .ndo_stop = xpnet_dev_stop, + .ndo_start_xmit = xpnet_dev_hard_start_xmit, + .ndo_change_mtu = xpnet_dev_change_mtu, + .ndo_tx_timeout = xpnet_dev_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +static int __init +xpnet_init(void) +{ + int result; + + if (!is_shub() && !is_uv()) + return -ENODEV; + + dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); + + xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) * + sizeof(long), GFP_KERNEL); + if (xpnet_broadcast_partitions == NULL) + return -ENOMEM; + + /* + * use ether_setup() to init the majority of our device + * structure and then override the necessary pieces. + */ + xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, ether_setup); + if (xpnet_device == NULL) { + kfree(xpnet_broadcast_partitions); + return -ENOMEM; + } + + netif_carrier_off(xpnet_device); + + xpnet_device->netdev_ops = &xpnet_netdev_ops; + xpnet_device->mtu = XPNET_DEF_MTU; + + /* + * Multicast assumes the LSB of the first octet is set for multicast + * MAC addresses. We chose the first octet of the MAC to be unlikely + * to collide with any vendor's officially issued MAC. + */ + xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */ + + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id; + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8); + + /* + * ether_setup() sets this to a multicast device. We are + * really not supporting multicast at this time. + */ + xpnet_device->flags &= ~IFF_MULTICAST; + + /* + * No need to checksum as it is a DMA transfer. The BTE will + * report an error if the data is not retrievable and the + * packet will be dropped. + */ + xpnet_device->features = NETIF_F_NO_CSUM; + + result = register_netdev(xpnet_device); + if (result != 0) { + free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); + } + + return result; +} + +module_init(xpnet_init); + +static void __exit +xpnet_exit(void) +{ + dev_info(xpnet, "unregistering network device %s\n", + xpnet_device[0].name); + + unregister_netdev(xpnet_device); + free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); +} + +module_exit(xpnet_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ti_dac7512.c b/drivers/misc/ti_dac7512.c new file mode 100644 index 00000000000..d3f229a3a77 --- /dev/null +++ b/drivers/misc/ti_dac7512.c @@ -0,0 +1,101 @@ +/* + * dac7512.c - Linux kernel module for + * Texas Instruments DAC7512 + * + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/spi/spi.h> + +#define DAC7512_DRV_NAME "dac7512" +#define DRIVER_VERSION "1.0" + +static ssize_t dac7512_store_val(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct spi_device *spi = to_spi_device(dev); + unsigned char tmp[2]; + unsigned long val; + + if (strict_strtoul(buf, 10, &val) < 0) + return -EINVAL; + + tmp[0] = val >> 8; + tmp[1] = val & 0xff; + spi_write(spi, tmp, sizeof(tmp)); + return count; +} + +static DEVICE_ATTR(value, S_IWUSR, NULL, dac7512_store_val); + +static struct attribute *dac7512_attributes[] = { + &dev_attr_value.attr, + NULL +}; + +static const struct attribute_group dac7512_attr_group = { + .attrs = dac7512_attributes, +}; + +static int __devinit dac7512_probe(struct spi_device *spi) +{ + int ret; + + spi->bits_per_word = 8; + spi->mode = SPI_MODE_0; + ret = spi_setup(spi); + if (ret < 0) + return ret; + + return sysfs_create_group(&spi->dev.kobj, &dac7512_attr_group); +} + +static int __devexit dac7512_remove(struct spi_device *spi) +{ + sysfs_remove_group(&spi->dev.kobj, &dac7512_attr_group); + return 0; +} + +static struct spi_driver dac7512_driver = { + .driver = { + .name = DAC7512_DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = dac7512_probe, + .remove = __devexit_p(dac7512_remove), +}; + +static int __init dac7512_init(void) +{ + return spi_register_driver(&dac7512_driver); +} + +static void __exit dac7512_exit(void) +{ + spi_unregister_driver(&dac7512_driver); +} + +MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); +MODULE_DESCRIPTION("DAC7512 16-bit DAC"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRIVER_VERSION); + +module_init(dac7512_init); +module_exit(dac7512_exit); diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c new file mode 100644 index 00000000000..a6ef18259da --- /dev/null +++ b/drivers/misc/tifm_7xx1.c @@ -0,0 +1,453 @@ +/* + * tifm_7xx1.c - TI FlashMedia driver + * + * Copyright (C) 2006 Alex Dubov <oakad@yahoo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/tifm.h> +#include <linux/dma-mapping.h> + +#define DRIVER_NAME "tifm_7xx1" +#define DRIVER_VERSION "0.8" + +#define TIFM_IRQ_ENABLE 0x80000000 +#define TIFM_IRQ_SOCKMASK(x) (x) +#define TIFM_IRQ_CARDMASK(x) ((x) << 8) +#define TIFM_IRQ_FIFOMASK(x) ((x) << 16) +#define TIFM_IRQ_SETALL 0xffffffff + +static void tifm_7xx1_dummy_eject(struct tifm_adapter *fm, + struct tifm_dev *sock) +{ +} + +static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock) +{ + unsigned long flags; + + spin_lock_irqsave(&fm->lock, flags); + fm->socket_change_set |= 1 << sock->socket_id; + tifm_queue_work(&fm->media_switcher); + spin_unlock_irqrestore(&fm->lock, flags); +} + +static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id) +{ + struct tifm_adapter *fm = dev_id; + struct tifm_dev *sock; + unsigned int irq_status, cnt; + + spin_lock(&fm->lock); + irq_status = readl(fm->addr + FM_INTERRUPT_STATUS); + if (irq_status == 0 || irq_status == (~0)) { + spin_unlock(&fm->lock); + return IRQ_NONE; + } + + if (irq_status & TIFM_IRQ_ENABLE) { + writel(TIFM_IRQ_ENABLE, fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + sock = fm->sockets[cnt]; + if (sock) { + if ((irq_status >> cnt) & TIFM_IRQ_FIFOMASK(1)) + sock->data_event(sock); + if ((irq_status >> cnt) & TIFM_IRQ_CARDMASK(1)) + sock->card_event(sock); + } + } + + fm->socket_change_set |= irq_status + & ((1 << fm->num_sockets) - 1); + } + writel(irq_status, fm->addr + FM_INTERRUPT_STATUS); + + if (fm->finish_me) + complete_all(fm->finish_me); + else if (!fm->socket_change_set) + writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE); + else + tifm_queue_work(&fm->media_switcher); + + spin_unlock(&fm->lock); + return IRQ_HANDLED; +} + +static unsigned char tifm_7xx1_toggle_sock_power(char __iomem *sock_addr) +{ + unsigned int s_state; + int cnt; + + writel(0x0e00, sock_addr + SOCK_CONTROL); + + for (cnt = 16; cnt <= 256; cnt <<= 1) { + if (!(TIFM_SOCK_STATE_POWERED + & readl(sock_addr + SOCK_PRESENT_STATE))) + break; + + msleep(cnt); + } + + s_state = readl(sock_addr + SOCK_PRESENT_STATE); + if (!(TIFM_SOCK_STATE_OCCUPIED & s_state)) + return 0; + + writel(readl(sock_addr + SOCK_CONTROL) | TIFM_CTRL_LED, + sock_addr + SOCK_CONTROL); + + /* xd needs some extra time before power on */ + if (((readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7) + == TIFM_TYPE_XD) + msleep(40); + + writel((s_state & TIFM_CTRL_POWER_MASK) | 0x0c00, + sock_addr + SOCK_CONTROL); + /* wait for power to stabilize */ + msleep(20); + for (cnt = 16; cnt <= 256; cnt <<= 1) { + if ((TIFM_SOCK_STATE_POWERED + & readl(sock_addr + SOCK_PRESENT_STATE))) + break; + + msleep(cnt); + } + + writel(readl(sock_addr + SOCK_CONTROL) & (~TIFM_CTRL_LED), + sock_addr + SOCK_CONTROL); + + return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7; +} + +inline static void tifm_7xx1_sock_power_off(char __iomem *sock_addr) +{ + writel((~TIFM_CTRL_POWER_MASK) & readl(sock_addr + SOCK_CONTROL), + sock_addr + SOCK_CONTROL); +} + +inline static char __iomem * +tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num) +{ + return base_addr + ((sock_num + 1) << 10); +} + +static void tifm_7xx1_switch_media(struct work_struct *work) +{ + struct tifm_adapter *fm = container_of(work, struct tifm_adapter, + media_switcher); + struct tifm_dev *sock; + char __iomem *sock_addr; + unsigned long flags; + unsigned char media_id; + unsigned int socket_change_set, cnt; + + spin_lock_irqsave(&fm->lock, flags); + socket_change_set = fm->socket_change_set; + fm->socket_change_set = 0; + + dev_dbg(fm->dev.parent, "checking media set %x\n", + socket_change_set); + + if (!socket_change_set) { + spin_unlock_irqrestore(&fm->lock, flags); + return; + } + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + if (!(socket_change_set & (1 << cnt))) + continue; + sock = fm->sockets[cnt]; + if (sock) { + printk(KERN_INFO + "%s : demand removing card from socket %u:%u\n", + dev_name(&fm->dev), fm->id, cnt); + fm->sockets[cnt] = NULL; + sock_addr = sock->addr; + spin_unlock_irqrestore(&fm->lock, flags); + device_unregister(&sock->dev); + spin_lock_irqsave(&fm->lock, flags); + tifm_7xx1_sock_power_off(sock_addr); + writel(0x0e00, sock_addr + SOCK_CONTROL); + } + + spin_unlock_irqrestore(&fm->lock, flags); + + media_id = tifm_7xx1_toggle_sock_power( + tifm_7xx1_sock_addr(fm->addr, cnt)); + + // tifm_alloc_device will check if media_id is valid + sock = tifm_alloc_device(fm, cnt, media_id); + if (sock) { + sock->addr = tifm_7xx1_sock_addr(fm->addr, cnt); + + if (!device_register(&sock->dev)) { + spin_lock_irqsave(&fm->lock, flags); + if (!fm->sockets[cnt]) { + fm->sockets[cnt] = sock; + sock = NULL; + } + spin_unlock_irqrestore(&fm->lock, flags); + } + if (sock) + tifm_free_device(&sock->dev); + } + spin_lock_irqsave(&fm->lock, flags); + } + + writel(TIFM_IRQ_FIFOMASK(socket_change_set) + | TIFM_IRQ_CARDMASK(socket_change_set), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + + writel(TIFM_IRQ_FIFOMASK(socket_change_set) + | TIFM_IRQ_CARDMASK(socket_change_set), + fm->addr + FM_SET_INTERRUPT_ENABLE); + + writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE); + spin_unlock_irqrestore(&fm->lock, flags); +} + +#ifdef CONFIG_PM + +static int tifm_7xx1_suspend(struct pci_dev *dev, pm_message_t state) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int cnt; + + dev_dbg(&dev->dev, "suspending host\n"); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + if (fm->sockets[cnt]) + tifm_7xx1_sock_power_off(fm->sockets[cnt]->addr); + } + + pci_save_state(dev); + pci_enable_wake(dev, pci_choose_state(dev, state), 0); + pci_disable_device(dev); + pci_set_power_state(dev, pci_choose_state(dev, state)); + return 0; +} + +static int tifm_7xx1_resume(struct pci_dev *dev) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int rc; + unsigned int good_sockets = 0, bad_sockets = 0; + unsigned long flags; + unsigned char new_ids[fm->num_sockets]; + DECLARE_COMPLETION_ONSTACK(finish_resume); + + pci_set_power_state(dev, PCI_D0); + pci_restore_state(dev); + rc = pci_enable_device(dev); + if (rc) + return rc; + pci_set_master(dev); + + dev_dbg(&dev->dev, "resuming host\n"); + + for (rc = 0; rc < fm->num_sockets; rc++) + new_ids[rc] = tifm_7xx1_toggle_sock_power( + tifm_7xx1_sock_addr(fm->addr, rc)); + spin_lock_irqsave(&fm->lock, flags); + for (rc = 0; rc < fm->num_sockets; rc++) { + if (fm->sockets[rc]) { + if (fm->sockets[rc]->type == new_ids[rc]) + good_sockets |= 1 << rc; + else + bad_sockets |= 1 << rc; + } + } + + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_SET_INTERRUPT_ENABLE); + dev_dbg(&dev->dev, "change sets on resume: good %x, bad %x\n", + good_sockets, bad_sockets); + + fm->socket_change_set = 0; + if (good_sockets) { + fm->finish_me = &finish_resume; + spin_unlock_irqrestore(&fm->lock, flags); + rc = wait_for_completion_timeout(&finish_resume, HZ); + dev_dbg(&dev->dev, "wait returned %d\n", rc); + writel(TIFM_IRQ_FIFOMASK(good_sockets) + | TIFM_IRQ_CARDMASK(good_sockets), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + writel(TIFM_IRQ_FIFOMASK(good_sockets) + | TIFM_IRQ_CARDMASK(good_sockets), + fm->addr + FM_SET_INTERRUPT_ENABLE); + spin_lock_irqsave(&fm->lock, flags); + fm->finish_me = NULL; + fm->socket_change_set ^= good_sockets & fm->socket_change_set; + } + + fm->socket_change_set |= bad_sockets; + if (fm->socket_change_set) + tifm_queue_work(&fm->media_switcher); + + spin_unlock_irqrestore(&fm->lock, flags); + writel(TIFM_IRQ_ENABLE, + fm->addr + FM_SET_INTERRUPT_ENABLE); + + return 0; +} + +#else + +#define tifm_7xx1_suspend NULL +#define tifm_7xx1_resume NULL + +#endif /* CONFIG_PM */ + +static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm, + struct tifm_dev *sock) +{ + return 0; +} + +static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock) +{ + if (((fm->num_sockets == 4) && (sock->socket_id == 2)) + || ((fm->num_sockets == 2) && (sock->socket_id == 0))) + return 1; + + return 0; +} + +static int tifm_7xx1_probe(struct pci_dev *dev, + const struct pci_device_id *dev_id) +{ + struct tifm_adapter *fm; + int pci_dev_busy = 0; + int rc; + + rc = pci_set_dma_mask(dev, DMA_BIT_MASK(32)); + if (rc) + return rc; + + rc = pci_enable_device(dev); + if (rc) + return rc; + + pci_set_master(dev); + + rc = pci_request_regions(dev, DRIVER_NAME); + if (rc) { + pci_dev_busy = 1; + goto err_out; + } + + pci_intx(dev, 1); + + fm = tifm_alloc_adapter(dev->device == PCI_DEVICE_ID_TI_XX21_XX11_FM + ? 4 : 2, &dev->dev); + if (!fm) { + rc = -ENOMEM; + goto err_out_int; + } + + INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media); + fm->eject = tifm_7xx1_eject; + fm->has_ms_pif = tifm_7xx1_has_ms_pif; + pci_set_drvdata(dev, fm); + + fm->addr = pci_ioremap_bar(dev, 0); + if (!fm->addr) + goto err_out_free; + + rc = request_irq(dev->irq, tifm_7xx1_isr, IRQF_SHARED, DRIVER_NAME, fm); + if (rc) + goto err_out_unmap; + + rc = tifm_add_adapter(fm); + if (rc) + goto err_out_irq; + + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_SET_INTERRUPT_ENABLE); + return 0; + +err_out_irq: + free_irq(dev->irq, fm); +err_out_unmap: + iounmap(fm->addr); +err_out_free: + pci_set_drvdata(dev, NULL); + tifm_free_adapter(fm); +err_out_int: + pci_intx(dev, 0); + pci_release_regions(dev); +err_out: + if (!pci_dev_busy) + pci_disable_device(dev); + return rc; +} + +static void tifm_7xx1_remove(struct pci_dev *dev) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int cnt; + + fm->eject = tifm_7xx1_dummy_eject; + fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif; + writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + mmiowb(); + free_irq(dev->irq, fm); + + tifm_remove_adapter(fm); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) + tifm_7xx1_sock_power_off(tifm_7xx1_sock_addr(fm->addr, cnt)); + + pci_set_drvdata(dev, NULL); + + iounmap(fm->addr); + pci_intx(dev, 0); + pci_release_regions(dev); + + pci_disable_device(dev); + tifm_free_adapter(fm); +} + +static struct pci_device_id tifm_7xx1_pci_tbl [] = { + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, /* xx21 - the one I have */ + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX12_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX20_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, + { } +}; + +static struct pci_driver tifm_7xx1_driver = { + .name = DRIVER_NAME, + .id_table = tifm_7xx1_pci_tbl, + .probe = tifm_7xx1_probe, + .remove = tifm_7xx1_remove, + .suspend = tifm_7xx1_suspend, + .resume = tifm_7xx1_resume, +}; + +static int __init tifm_7xx1_init(void) +{ + return pci_register_driver(&tifm_7xx1_driver); +} + +static void __exit tifm_7xx1_exit(void) +{ + pci_unregister_driver(&tifm_7xx1_driver); +} + +MODULE_AUTHOR("Alex Dubov"); +MODULE_DESCRIPTION("TI FlashMedia host driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, tifm_7xx1_pci_tbl); +MODULE_VERSION(DRIVER_VERSION); + +module_init(tifm_7xx1_init); +module_exit(tifm_7xx1_exit); diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c new file mode 100644 index 00000000000..98bcba521da --- /dev/null +++ b/drivers/misc/tifm_core.c @@ -0,0 +1,366 @@ +/* + * tifm_core.c - TI FlashMedia driver + * + * Copyright (C) 2006 Alex Dubov <oakad@yahoo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/tifm.h> +#include <linux/init.h> +#include <linux/idr.h> + +#define DRIVER_NAME "tifm_core" +#define DRIVER_VERSION "0.8" + +static struct workqueue_struct *workqueue; +static DEFINE_IDR(tifm_adapter_idr); +static DEFINE_SPINLOCK(tifm_adapter_lock); + +static const char *tifm_media_type_name(unsigned char type, unsigned char nt) +{ + const char *card_type_name[3][3] = { + { "SmartMedia/xD", "MemoryStick", "MMC/SD" }, + { "XD", "MS", "SD"}, + { "xd", "ms", "sd"} + }; + + if (nt > 2 || type < 1 || type > 3) + return NULL; + return card_type_name[nt][type - 1]; +} + +static int tifm_dev_match(struct tifm_dev *sock, struct tifm_device_id *id) +{ + if (sock->type == id->type) + return 1; + return 0; +} + +static int tifm_bus_match(struct device *dev, struct device_driver *drv) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *fm_drv = container_of(drv, struct tifm_driver, + driver); + struct tifm_device_id *ids = fm_drv->id_table; + + if (ids) { + while (ids->type) { + if (tifm_dev_match(sock, ids)) + return 1; + ++ids; + } + } + return 0; +} + +static int tifm_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + + if (add_uevent_var(env, "TIFM_CARD_TYPE=%s", tifm_media_type_name(sock->type, 1))) + return -ENOMEM; + + return 0; +} + +static int tifm_device_probe(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + int rc = -ENODEV; + + get_device(dev); + if (dev->driver && drv->probe) { + rc = drv->probe(sock); + if (!rc) + return 0; + } + put_device(dev); + return rc; +} + +static void tifm_dummy_event(struct tifm_dev *sock) +{ + return; +} + +static int tifm_device_remove(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->remove) { + sock->card_event = tifm_dummy_event; + sock->data_event = tifm_dummy_event; + drv->remove(sock); + sock->dev.driver = NULL; + } + + put_device(dev); + return 0; +} + +#ifdef CONFIG_PM + +static int tifm_device_suspend(struct device *dev, pm_message_t state) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->suspend) + return drv->suspend(sock, state); + return 0; +} + +static int tifm_device_resume(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->resume) + return drv->resume(sock); + return 0; +} + +#else + +#define tifm_device_suspend NULL +#define tifm_device_resume NULL + +#endif /* CONFIG_PM */ + +static ssize_t type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + return sprintf(buf, "%x", sock->type); +} + +static struct device_attribute tifm_dev_attrs[] = { + __ATTR(type, S_IRUGO, type_show, NULL), + __ATTR_NULL +}; + +static struct bus_type tifm_bus_type = { + .name = "tifm", + .dev_attrs = tifm_dev_attrs, + .match = tifm_bus_match, + .uevent = tifm_uevent, + .probe = tifm_device_probe, + .remove = tifm_device_remove, + .suspend = tifm_device_suspend, + .resume = tifm_device_resume +}; + +static void tifm_free(struct device *dev) +{ + struct tifm_adapter *fm = container_of(dev, struct tifm_adapter, dev); + + kfree(fm); +} + +static struct class tifm_adapter_class = { + .name = "tifm_adapter", + .dev_release = tifm_free +}; + +struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets, + struct device *dev) +{ + struct tifm_adapter *fm; + + fm = kzalloc(sizeof(struct tifm_adapter) + + sizeof(struct tifm_dev*) * num_sockets, GFP_KERNEL); + if (fm) { + fm->dev.class = &tifm_adapter_class; + fm->dev.parent = dev; + device_initialize(&fm->dev); + spin_lock_init(&fm->lock); + fm->num_sockets = num_sockets; + } + return fm; +} +EXPORT_SYMBOL(tifm_alloc_adapter); + +int tifm_add_adapter(struct tifm_adapter *fm) +{ + int rc; + + if (!idr_pre_get(&tifm_adapter_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&tifm_adapter_lock); + rc = idr_get_new(&tifm_adapter_idr, fm, &fm->id); + spin_unlock(&tifm_adapter_lock); + if (rc) + return rc; + + dev_set_name(&fm->dev, "tifm%u", fm->id); + rc = device_add(&fm->dev); + if (rc) { + spin_lock(&tifm_adapter_lock); + idr_remove(&tifm_adapter_idr, fm->id); + spin_unlock(&tifm_adapter_lock); + } + + return rc; +} +EXPORT_SYMBOL(tifm_add_adapter); + +void tifm_remove_adapter(struct tifm_adapter *fm) +{ + unsigned int cnt; + + flush_workqueue(workqueue); + for (cnt = 0; cnt < fm->num_sockets; ++cnt) { + if (fm->sockets[cnt]) + device_unregister(&fm->sockets[cnt]->dev); + } + + spin_lock(&tifm_adapter_lock); + idr_remove(&tifm_adapter_idr, fm->id); + spin_unlock(&tifm_adapter_lock); + device_del(&fm->dev); +} +EXPORT_SYMBOL(tifm_remove_adapter); + +void tifm_free_adapter(struct tifm_adapter *fm) +{ + put_device(&fm->dev); +} +EXPORT_SYMBOL(tifm_free_adapter); + +void tifm_free_device(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + kfree(sock); +} +EXPORT_SYMBOL(tifm_free_device); + +struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id, + unsigned char type) +{ + struct tifm_dev *sock = NULL; + + if (!tifm_media_type_name(type, 0)) + return sock; + + sock = kzalloc(sizeof(struct tifm_dev), GFP_KERNEL); + if (sock) { + spin_lock_init(&sock->lock); + sock->type = type; + sock->socket_id = id; + sock->card_event = tifm_dummy_event; + sock->data_event = tifm_dummy_event; + + sock->dev.parent = fm->dev.parent; + sock->dev.bus = &tifm_bus_type; + sock->dev.dma_mask = fm->dev.parent->dma_mask; + sock->dev.release = tifm_free_device; + + dev_set_name(&sock->dev, "tifm_%s%u:%u", + tifm_media_type_name(type, 2), fm->id, id); + printk(KERN_INFO DRIVER_NAME + ": %s card detected in socket %u:%u\n", + tifm_media_type_name(type, 0), fm->id, id); + } + return sock; +} +EXPORT_SYMBOL(tifm_alloc_device); + +void tifm_eject(struct tifm_dev *sock) +{ + struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent); + fm->eject(fm, sock); +} +EXPORT_SYMBOL(tifm_eject); + +int tifm_has_ms_pif(struct tifm_dev *sock) +{ + struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent); + return fm->has_ms_pif(fm, sock); +} +EXPORT_SYMBOL(tifm_has_ms_pif); + +int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents, + int direction) +{ + return pci_map_sg(to_pci_dev(sock->dev.parent), sg, nents, direction); +} +EXPORT_SYMBOL(tifm_map_sg); + +void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents, + int direction) +{ + pci_unmap_sg(to_pci_dev(sock->dev.parent), sg, nents, direction); +} +EXPORT_SYMBOL(tifm_unmap_sg); + +void tifm_queue_work(struct work_struct *work) +{ + queue_work(workqueue, work); +} +EXPORT_SYMBOL(tifm_queue_work); + +int tifm_register_driver(struct tifm_driver *drv) +{ + drv->driver.bus = &tifm_bus_type; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL(tifm_register_driver); + +void tifm_unregister_driver(struct tifm_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(tifm_unregister_driver); + +static int __init tifm_init(void) +{ + int rc; + + workqueue = create_freezeable_workqueue("tifm"); + if (!workqueue) + return -ENOMEM; + + rc = bus_register(&tifm_bus_type); + + if (rc) + goto err_out_wq; + + rc = class_register(&tifm_adapter_class); + if (!rc) + return 0; + + bus_unregister(&tifm_bus_type); + +err_out_wq: + destroy_workqueue(workqueue); + + return rc; +} + +static void __exit tifm_exit(void) +{ + class_unregister(&tifm_adapter_class); + bus_unregister(&tifm_bus_type); + destroy_workqueue(workqueue); +} + +subsys_initcall(tifm_init); +module_exit(tifm_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Alex Dubov"); +MODULE_DESCRIPTION("TI FlashMedia core driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); |