summaryrefslogtreecommitdiffstats
path: root/drivers/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/dma_remapping.h157
-rw-r--r--drivers/pci/dmar.c401
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c2
-rw-r--r--drivers/pci/intel-iommu.c164
-rw-r--r--drivers/pci/intel-iommu.h233
-rw-r--r--drivers/pci/intr_remapping.c471
-rw-r--r--drivers/pci/intr_remapping.h8
-rw-r--r--drivers/pci/msi.c20
-rw-r--r--drivers/pci/pci-acpi.c7
-rw-r--r--drivers/pci/pci.c45
-rw-r--r--drivers/pci/pcie/aspm.c32
-rw-r--r--drivers/pci/probe.c299
-rw-r--r--drivers/pci/proc.c18
-rw-r--r--drivers/pci/quirks.c20
15 files changed, 1418 insertions, 461 deletions
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 7d63f8ced24..4b47f4ece5b 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
# Build Intel IOMMU support
obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
+obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
+
#
# Some architectures use the generic PCI setup functions
#
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644
index 00000000000..bff5c65f81d
--- /dev/null
+++ b/drivers/pci/dma_remapping.h
@@ -0,0 +1,157 @@
+#ifndef _DMA_REMAPPING_H
+#define _DMA_REMAPPING_H
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K (12)
+#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 val;
+ u64 rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+ return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+ root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+ root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+ return (struct context_entry *)
+ (root_present(root)?phys_to_virt(
+ root->val & PAGE_MASK_4K):
+ NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi & 7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+ do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+ do { \
+ (c).lo &= (((u64)-1) << 4) | 3; \
+ (c).lo |= ((val) & 3) << 2; \
+ } while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+ do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+ do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+ u64 val;
+};
+#define dma_clear_pte(p) do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+ do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+ (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+ int id; /* domain id */
+ struct intel_iommu *iommu; /* back pointer to owning iommu */
+
+ struct list_head devices; /* all devices' list */
+ struct iova_domain iovad; /* iova's that belong to this domain */
+
+ struct dma_pte *pgd; /* virtual address */
+ spinlock_t mapping_lock; /* page table lock */
+ int gaw; /* max guest address width */
+
+ /* adjusted guest address width, 0 is level 2 30-bit */
+ int agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+ int flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+ struct list_head link; /* link to domain siblings */
+ struct list_head global; /* link to global list */
+ u8 bus; /* PCI bus numer */
+ u8 devfn; /* PCI devfn number */
+ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+ struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+extern int dmar_disabled;
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+ return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f941f609dbf..bd2c01674f5 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -19,13 +19,16 @@
* Author: Shaohua Li <shaohua.li@intel.com>
* Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*
- * This file implements early detection/parsing of DMA Remapping Devices
+ * This file implements early detection/parsing of Remapping Devices
* reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
* tables.
+ *
+ * These routines are used by both DMA-remapping and Interrupt-remapping
*/
#include <linux/pci.h>
#include <linux/dmar.h>
+#include <linux/timer.h>
#include "iova.h"
#include "intel-iommu.h"
@@ -37,7 +40,6 @@
* these units are not supported by the architecture.
*/
LIST_HEAD(dmar_drhd_units);
-LIST_HEAD(dmar_rmrr_units);
static struct acpi_table_header * __initdata dmar_tbl;
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
list_add(&drhd->list, &dmar_drhd_units);
}
-static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
-{
- list_add(&rmrr->list, &dmar_rmrr_units);
-}
-
static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
struct pci_dev **dev, u16 segment)
{
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
struct acpi_dmar_hardware_unit *drhd;
struct dmar_drhd_unit *dmaru;
int ret = 0;
- static int include_all;
dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
if (!dmaru)
return -ENOMEM;
+ dmaru->hdr = header;
drhd = (struct acpi_dmar_hardware_unit *)header;
dmaru->reg_base_addr = drhd->address;
dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
+ ret = alloc_iommu(dmaru);
+ if (ret) {
+ kfree(dmaru);
+ return ret;
+ }
+ dmar_register_drhd_unit(dmaru);
+ return 0;
+}
+
+static int __init
+dmar_parse_dev(struct dmar_drhd_unit *dmaru)
+{
+ struct acpi_dmar_hardware_unit *drhd;
+ static int include_all;
+ int ret;
+
+ drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
+
if (!dmaru->include_all)
ret = dmar_parse_dev_scope((void *)(drhd + 1),
- ((void *)drhd) + header->length,
+ ((void *)drhd) + drhd->header.length,
&dmaru->devices_cnt, &dmaru->devices,
drhd->segment);
else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
include_all = 1;
}
- if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all))
+ if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+ list_del(&dmaru->list);
kfree(dmaru);
- else
- dmar_register_drhd_unit(dmaru);
+ }
return ret;
}
+#ifdef CONFIG_DMAR
+LIST_HEAD(dmar_rmrr_units);
+
+static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
+{
+ list_add(&rmrr->list, &dmar_rmrr_units);
+}
+
+
static int __init
dmar_parse_one_rmrr(struct acpi_dmar_header *header)
{
struct acpi_dmar_reserved_memory *rmrr;
struct dmar_rmrr_unit *rmrru;
- int ret = 0;
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru)
return -ENOMEM;
+ rmrru->hdr = header;
rmrr = (struct acpi_dmar_reserved_memory *)header;
rmrru->base_address = rmrr->base_address;
rmrru->end_address = rmrr->end_address;
+
+ dmar_register_rmrr_unit(rmrru);
+ return 0;
+}
+
+static int __init
+rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
+{
+ struct acpi_dmar_reserved_memory *rmrr;
+ int ret;
+
+ rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
ret = dmar_parse_dev_scope((void *)(rmrr + 1),
- ((void *)rmrr) + header->length,
+ ((void *)rmrr) + rmrr->header.length,
&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
- if (ret || (rmrru->devices_cnt == 0))
+ if (ret || (rmrru->devices_cnt == 0)) {
+ list_del(&rmrru->list);
kfree(rmrru);
- else
- dmar_register_rmrr_unit(rmrru);
+ }
return ret;
}
+#endif
static void __init
dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
}
}
+
/**
* parse_dmar_table - parses the DMA reporting table
*/
@@ -284,7 +322,9 @@ parse_dmar_table(void)
ret = dmar_parse_one_drhd(entry_header);
break;
case ACPI_DMAR_TYPE_RESERVED_MEMORY:
+#ifdef CONFIG_DMAR
ret = dmar_parse_one_rmrr(entry_header);
+#endif
break;
default:
printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
return ret;
}
+int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
+ struct pci_dev *dev)
+{
+ int index;
+
+ while (dev) {
+ for (index = 0; index < cnt; index++)
+ if (dev == devices[index])
+ return 1;
-int __init dmar_table_init(void)
+ /* Check our parent */
+ dev = dev->bus->self;
+ }
+
+ return 0;
+}
+
+struct dmar_drhd_unit *
+dmar_find_matched_drhd_unit(struct pci_dev *dev)
{
+ struct dmar_drhd_unit *drhd = NULL;
+ list_for_each_entry(drhd, &dmar_drhd_units, list) {
+ if (drhd->include_all || dmar_pci_device_match(drhd->devices,
+ drhd->devices_cnt, dev))
+ return drhd;
+ }
+
+ return NULL;
+}
+
+int __init dmar_dev_scope_init(void)
+{
+ struct dmar_drhd_unit *drhd;
+ int ret = -ENODEV;
+
+ for_each_drhd_unit(drhd) {
+ ret = dmar_parse_dev(drhd);
+ if (ret)
+ return ret;
+ }
+
+#ifdef CONFIG_DMAR
+ {
+ struct dmar_rmrr_unit *rmrr;
+ for_each_rmrr_units(rmrr) {
+ ret = rmrr_parse_dev(rmrr);
+ if (ret)
+ return ret;
+ }
+ }
+#endif
+
+ return ret;
+}
+
+
+int __init dmar_table_init(void)
+{
+ static int dmar_table_initialized;
int ret;
+ if (dmar_table_initialized)
+ return 0;
+
+ dmar_table_initialized = 1;
+
ret = parse_dmar_table();
if (ret) {
- printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
+ if (ret != -ENODEV)
+ printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
return ret;
}
@@ -317,11 +419,14 @@ int __init dmar_table_init(void)
return -ENODEV;
}
- if (list_empty(&dmar_rmrr_units)) {
+#ifdef CONFIG_DMAR
+ if (list_empty(&dmar_rmrr_units))
printk(KERN_INFO PREFIX "No RMRR found\n");
- return -ENODEV;
- }
+#endif
+#ifdef CONFIG_INTR_REMAP
+ parse_ioapics_under_ir();
+#endif
return 0;
}
@@ -343,3 +448,255 @@ int __init early_dmar_detect(void)
return (ACPI_SUCCESS(status) ? 1 : 0);
}
+
+void __init detect_intel_iommu(void)
+{
+ int ret;
+
+ ret = early_dmar_detect();
+
+#ifdef CONFIG_DMAR
+ {
+ struct acpi_table_dmar *dmar;
+ /*
+ * for now we will disable dma-remapping when interrupt
+ * remapping is enabled.
+ * When support for queued invalidation for IOTLB invalidation
+ * is added, we will not need this any more.
+ */
+ dmar = (struct acpi_table_dmar *) dmar_tbl;
+ if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
+ printk(KERN_INFO
+ "Queued invalidation will be enabled to support "
+ "x2apic and Intr-remapping.\n");
+ printk(KERN_INFO
+ "Disabling IOMMU detection, because of missing "
+ "queued invalidation support for IOTLB "
+ "invalidation\n");
+ printk(KERN_INFO
+ "Use \"nox2apic\", if you want to use Intel "
+ " IOMMU for DMA-remapping and don't care about "
+ " x2apic support\n");
+
+ dmar_disabled = 1;
+ return;
+ }
+
+ if (ret && !no_iommu && !iommu_detected && !swiotlb &&
+ !dmar_disabled)
+ iommu_detected = 1;
+ }
+#endif
+}
+
+
+int alloc_iommu(struct dmar_drhd_unit *drhd)
+{
+ struct intel_iommu *iommu;
+ int map_size;
+ u32 ver;
+ static int iommu_allocated = 0;
+
+ iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+ if (!iommu)
+ return -ENOMEM;
+
+ iommu->seq_id = iommu_allocated++;
+
+ iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+ if (!iommu->reg) {
+ printk(KERN_ERR "IOMMU: can't map the region\n");
+ goto error;
+ }
+ iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+ iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+ /* the registers might be more than one page */
+ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+ cap_max_fault_reg_offset(iommu->cap));
+ map_size = PAGE_ALIGN_4K(map_size);
+ if (map_size > PAGE_SIZE_4K) {
+ iounmap(iommu->reg);
+ iommu->reg = ioremap(drhd->reg_base_addr, map_size);
+ if (!iommu->reg) {
+ printk(KERN_ERR "IOMMU: can't map the region\n");
+ goto error;
+ }
+ }
+
+ ver = readl(iommu->reg + DMAR_VER_REG);
+ pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
+ drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+ iommu->cap, iommu->ecap);
+
+ spin_lock_init(&iommu->register_lock);
+
+ drhd->iommu = iommu;
+ return 0;
+error:
+ kfree(iommu);
+ return -1;
+}
+
+void free_iommu(struct intel_iommu *iommu)
+{
+ if (!iommu)
+ return;
+
+#ifdef CONFIG_DMAR
+ free_dmar_iommu(iommu);
+#endif
+
+ if (iommu->reg)
+ iounmap(iommu->reg);
+ kfree(iommu);
+}
+
+/*
+ * Reclaim all the submitted descriptors which have completed its work.
+ */
+static inline void reclaim_free_desc(struct q_inval *qi)
+{
+ while (qi->desc_status[qi->free_tail] == QI_DONE) {
+ qi->desc_status[qi->free_tail] = QI_FREE;
+ qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
+ qi->free_cnt++;
+ }
+}
+
+/*
+ * Submit the queued invalidation descriptor to the remapping
+ * hardware unit and wait for its completion.
+ */
+void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+{
+ struct q_inval *qi = iommu->qi;
+ struct qi_desc *hw, wait_desc;
+ int wait_index, index;
+ unsigned long flags;
+
+ if (!qi)
+ return;
+
+ hw = qi->desc;
+
+ spin_lock(&qi->q_lock);
+ while (qi->free_cnt < 3) {
+ spin_unlock(&qi->q_lock);
+ cpu_relax();
+ spin_lock(&qi->q_lock);
+ }
+
+ index = qi->free_head;
+ wait_index = (index + 1) % QI_LENGTH;
+
+ qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+
+ hw[index] = *desc;
+
+ wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+ wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+
+ hw[wait_index] = wait_desc;
+
+ __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
+ __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
+
+ qi->free_head = (qi->free_head + 2) % QI_LENGTH;
+ qi->free_cnt -= 2;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ /*
+ * update the HW tail register indicating the presence of
+ * new descriptors.
+ */
+ writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ while (qi->desc_status[wait_index] != QI_DONE) {
+ spin_unlock(&qi->q_lock);
+ cpu_relax();
+ spin_lock(&qi->q_lock);
+ }
+
+ qi->desc_status[index] = QI_DONE;
+
+ reclaim_free_desc(qi);
+ spin_unlock(&qi->q_lock);
+}
+
+/*
+ * Flush the global interrupt entry cache.
+ */
+void qi_global_iec(struct intel_iommu *iommu)
+{
+ struct qi_desc desc;
+
+ desc.low = QI_IEC_TYPE;
+ desc.high = 0;
+
+ qi_submit_sync(&desc, iommu);
+}
+
+/*
+ * Enable Queued Invalidation interface. This is a must to support
+ * interrupt-remapping. Also used by DMA-remapping, which replaces
+ * register based IOTLB invalidation.
+ */
+int dmar_enable_qi(struct intel_iommu *iommu)
+{
+ u32 cmd, sts;
+ unsigned long flags;
+ struct q_inval *qi;
+
+ if (!ecap_qis(iommu->ecap))
+ return -ENOENT;
+
+ /*
+ * queued invalidation is already setup and enabled.
+ */
+ if (iommu->qi)
+ return 0;
+
+ iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
+ if (!iommu->qi)
+ return -ENOMEM;
+
+ qi = iommu->qi;
+
+ qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
+ if (!qi->desc) {
+ kfree(qi);
+ iommu->qi = 0;
+ return -ENOMEM;
+ }
+
+ qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
+ if (!qi->desc_status) {
+ free_page((unsigned long) qi->desc);
+ kfree(qi);
+ iommu->qi = 0;
+ return -ENOMEM;
+ }
+
+ qi->free_head = qi->free_tail = 0;
+ qi->free_cnt = QI_LENGTH;
+
+ spin_lock_init(&qi->q_lock);
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ /* write zero to the tail reg */
+ writel(0, iommu->reg + DMAR_IQT_REG);
+
+ dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+
+ cmd = iommu->gcmd | DMA_GCMD_QIE;
+ iommu->gcmd |= DMA_GCMD_QIE;
+ writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ return 0;
+}
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 1323a43285d..ad27e9e225a 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -1103,7 +1103,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
dbg(" Power Indicator : %3s\n", PWR_LED(ctrl) ? "yes" : "no");
dbg(" Hot-Plug Surprise : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no");
dbg(" EMI Present : %3s\n", EMI(ctrl) ? "yes" : "no");
- dbg(" Comamnd Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
+ dbg(" Command Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
pciehp_readw(ctrl, SLOTSTATUS, &reg16);
dbg("Slot Status : 0x%04x\n", reg16);
pciehp_readw(ctrl, SLOTCTRL, &reg16);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d0e60ac849..389fdd6f4a9 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,8 +49,6 @@
#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
-
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
-static struct intel_iommu *g_iommus;
-
#define HIGH_WATER_MARK 250
struct deferred_flush_tables {
int next;
@@ -80,7 +76,7 @@ static long list_size;
static void domain_remove_dev_info(struct dmar_domain *domain);
-static int dmar_disabled;
+int dmar_disabled;
static int __initdata dmar_map_gfx = 1;
static int dmar_forcedac;
static int intel_iommu_strict;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
}
-static inline void __iommu_flush_cache(
- struct intel_iommu *iommu, void *addr, int size)
-{
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(addr, size);
-}
-
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
return 0;
}
-#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
-{\
- cycles_t start_time = get_cycles();\
- while (1) {\
- sts = op (iommu->reg + offset);\
- if (cond)\
- break;\
- if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
- panic("DMAR hardware is malfunctioning\n");\
- cpu_relax();\
- }\
-}
-
static void iommu_set_root_entry(struct intel_iommu *iommu)
{
void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
return -ENOMEM;
}
+ spin_lock_init(&iommu->lock);
+
/*
* if Caching mode is set, then invalid translations are tagged
* with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
set_bit(0, iommu->domain_ids);
return 0;
}
-static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
- struct dmar_drhd_unit *drhd)
-{
- int ret;
- int map_size;
- u32 ver;
-
- iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
- if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
- goto error;
- }
- iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
- iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
- /* the registers might be more than one page */
- map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
- cap_max_fault_reg_offset(iommu->cap));
- map_size = PAGE_ALIGN_4K(map_size);
- if (map_size > PAGE_SIZE_4K) {
- iounmap(iommu->reg);
- iommu->reg = ioremap(drhd->reg_base_addr, map_size);
- if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
- goto error;
- }
- }
-
- ver = readl(iommu->reg + DMAR_VER_REG);
- pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
- drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
- iommu->cap, iommu->ecap);
- ret = iommu_init_domains(iommu);
- if (ret)
- goto error_unmap;
- spin_lock_init(&iommu->lock);
- spin_lock_init(&iommu->register_lock);
- drhd->iommu = iommu;
- return iommu;
-error_unmap:
- iounmap(iommu->reg);
-error:
- kfree(iommu);
- return NULL;
-}
static void domain_exit(struct dmar_domain *domain);
-static void free_iommu(struct intel_iommu *iommu)
+
+void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
int i;
- if (!iommu)
- return;
-
i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
/* free context mapping */
free_context_table(iommu);
-
- if (iommu->reg)
- iounmap(iommu->reg);
- kfree(iommu);
}
static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
return NULL;
}
-static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
- struct pci_dev *dev)
-{
- int index;
-
- while (dev) {
- for (index = 0; index < cnt; index++)
- if (dev == devices[index])
- return 1;
-
- /* Check our parent */
- dev = dev->bus->self;
- }
-
- return 0;
-}
-
-static struct dmar_drhd_unit *
-dmar_find_matched_drhd_unit(struct pci_dev *dev)
-{
- struct dmar_drhd_unit *drhd = NULL;
-
- list_for_each_entry(drhd, &dmar_drhd_units, list) {
- if (drhd->include_all || dmar_pci_device_match(drhd->devices,
- drhd->devices_cnt, dev))
- return drhd;
- }
-
- return NULL;
-}
-
/* domain is initialized */
static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
{
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
* endfor
*/
for_each_drhd_unit(drhd) {
- if (drhd->ignored)
- continue;
g_num_of_iommus++;
/*
* lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
*/
}
- g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
- if (!g_iommus) {
- ret = -ENOMEM;
- goto error;
- }
-
deferred_flush = kzalloc(g_num_of_iommus *
sizeof(struct deferred_flush_tables), GFP_KERNEL);
if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
goto error;
}
- i = 0;
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
- iommu = alloc_iommu(&g_iommus[i], drhd);
- i++;
- if (!iommu) {
- ret = -ENOMEM;
+
+ iommu = drhd->iommu;
+
+ ret = iommu_init_domains(iommu);
+ if (ret)
goto error;
- }
/*
* TBD:
@@ -1845,7 +1732,6 @@ error:
iommu = drhd->iommu;
free_iommu(iommu);
}
- kfree(g_iommus);
return ret;
}
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
/* just flush them all */
for (i = 0; i < g_num_of_iommus; i++) {
if (deferred_flush[i].next) {
- iommu_flush_iotlb_global(&g_iommus[i], 0);
+ struct intel_iommu *iommu =
+ deferred_flush[i].domain[0]->iommu;
+
+ iommu_flush_iotlb_global(iommu, 0);
for (j = 0; j < deferred_flush[i].next; j++) {
__free_iova(&deferred_flush[i].domain[j]->iovad,
deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
if (list_size == HIGH_WATER_MARK)
flush_unmaps();
- iommu_id = dom->iommu - g_iommus;
+ iommu_id = dom->iommu->seq_id;
+
next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom;
deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,15 +2238,6 @@ static void __init iommu_exit_mempool(void)
}
-void __init detect_intel_iommu(void)
-{
- if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
- return;
- if (early_dmar_detect()) {
- iommu_detected = 1;
- }
-}
-
static void __init init_no_remapping_devices(void)
{
struct dmar_drhd_unit *drhd;
@@ -2403,12 +2284,19 @@ int __init intel_iommu_init(void)
{
int ret = 0;
- if (no_iommu || swiotlb || dmar_disabled)
- return -ENODEV;
-
if (dmar_table_init())
return -ENODEV;
+ if (dmar_dev_scope_init())
+ return -ENODEV;
+
+ /*
+ * Check the need for DMA-remapping initialization now.
+ * Above initialization will also be used by Interrupt-remapping.
+ */
+ if (no_iommu || swiotlb || dmar_disabled)
+ return -ENODEV;
+
iommu_init_mempool();
dmar_init_reserved_ranges();
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index afc0ad96122..2142c01e014 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -27,19 +27,8 @@
#include <linux/sysdev.h>
#include "iova.h"
#include <linux/io.h>
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K (12)
-#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+#include <asm/cacheflush.h>
+#include "dma_remapping.h"
/*
* Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
+#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
+#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
+#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
+#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
+#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
#define OFFSET_STRIDE (9)
/*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define ecap_max_iotlb_offset(e) \
(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
#define ecap_coherent(e) ((e) & 0x1)
+#define ecap_qis(e) ((e) & 0x2)
+#define ecap_eim_support(e) ((e >> 4) & 0x1)
+#define ecap_ir_support(e) ((e >> 3) & 0x1)
+#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
/* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
#define DMA_TLB_MAX_SIZE (0x3f)
+/* INVALID_DESC */
+#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
+#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
+#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
+#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
+#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr) (addr)
+#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
+
/* PMEN_REG */
#define DMA_PMEN_EPM (((u32)1)<<31)
#define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define DMA_GCMD_SFL (((u32)1) << 29)
#define DMA_GCMD_EAFL (((u32)1) << 28)
#define DMA_GCMD_WBF (((u32)1) << 27)
+#define DMA_GCMD_QIE (((u32)1) << 26)
+#define DMA_GCMD_SIRTP (((u32)1) << 24)
+#define DMA_GCMD_IRE (((u32) 1) << 25)
/* GSTS_REG */
#define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define DMA_GSTS_FLS (((u32)1) << 29)
#define DMA_GSTS_AFLS (((u32)1) << 28)
#define DMA_GSTS_WBFS (((u32)1) << 27)
+#define DMA_GSTS_QIES (((u32)1) << 26)
+#define DMA_GSTS_IRTPS (((u32)1) << 24)
+#define DMA_GSTS_IRES (((u32)1) << 25)
/* CCMD_REG */
#define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define dma_frcd_source_id(c) (c & 0xffff)
#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
- u64 val;
- u64 rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
- return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
- root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
- root->val |= value & PAGE_MASK_4K;
+#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+{\
+ cycles_t start_time = get_cycles();\
+ while (1) {\
+ sts = op (iommu->reg + offset);\
+ if (cond)\
+ break;\
+ if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+ panic("DMAR hardware is malfunctioning\n");\
+ cpu_relax();\
+ }\
}
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
- return (struct context_entry *)
- (root_present(root)?phys_to_virt(
- root->val & PAGE_MASK_4K):
- NULL);
-}
-
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
- u64 lo;
- u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi & 7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
- do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
- do { \
- (c).lo &= (((u64)-1) << 4) | 3; \
- (c).lo |= ((val) & 3) << 2; \
- } while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
- do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
- do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+#define QI_LENGTH 256 /* queue length */
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
- u64 val;
+enum {
+ QI_FREE,
+ QI_IN_USE,
+ QI_DONE
};
-#define dma_clear_pte(p) do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
- do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
- (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
+#define QI_CC_TYPE 0x1
+#define QI_IOTLB_TYPE 0x2
+#define QI_DIOTLB_TYPE 0x3
+#define QI_IEC_TYPE 0x4
+#define QI_IWD_TYPE 0x5
-struct intel_iommu;
+#define QI_IEC_SELECTIVE (((u64)1) << 4)
+#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
-struct dmar_domain {
- int id; /* domain id */
- struct intel_iommu *iommu; /* back pointer to owning iommu */
+#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
- struct list_head devices; /* all devices' list */
- struct iova_domain iovad; /* iova's that belong to this domain */
+struct qi_desc {
+ u64 low, high;
+};
- struct dma_pte *pgd; /* virtual address */
- spinlock_t mapping_lock; /* page table lock */
- int gaw; /* max guest address width */
+struct q_inval {
+ spinlock_t q_lock;
+ struct qi_desc *desc; /* invalidation queue */
+ int *desc_status; /* desc status */
+ int free_head; /* first free entry */
+ int free_tail; /* last free entry */
+ int free_cnt;
+};
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
+#ifdef CONFIG_INTR_REMAP
+/* 1MB - maximum possible interrupt remapping table size */
+#define INTR_REMAP_PAGE_ORDER 8
+#define INTR_REMAP_TABLE_REG_SIZE 0xf
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
- int flags;
-};
+#define INTR_REMAP_TABLE_ENTRIES 65536
-/* PCI domain-device relationship */
-struct device_domain_info {
- struct list_head link; /* link to domain siblings */
- struct list_head global; /* link to global list */
- u8 bus; /* PCI bus numer */
- u8 devfn; /* PCI devfn number */
- struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
- struct dmar_domain *domain; /* pointer to domain */
+struct ir_table {
+ struct irte *base;
};
-
-extern int init_dmars(void);
+#endif
struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u64 cap;
u64 ecap;
- unsigned long *domain_ids; /* bitmap of domains */
- struct dmar_domain **domains; /* ptr to domains */
int seg;
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
- spinlock_t lock; /* protect context, domain ids */
spinlock_t register_lock; /* protect register handling */
+ int seq_id; /* sequence id of the iommu */
+
+#ifdef CONFIG_DMAR
+ unsigned long *domain_ids; /* bitmap of domains */
+ struct dmar_domain **domains; /* ptr to domains */
+ spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
unsigned int irq;
unsigned char name[7]; /* Device Name */
struct msi_msg saved_msg;
struct sys_device sysdev;
+#endif
+ struct q_inval *qi; /* Queued invalidation info */
+#ifdef CONFIG_INTR_REMAP
+ struct ir_table *ir_table; /* Interrupt remapping info */
+#endif
};
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
+static inline void __iommu_flush_cache(
+ struct intel_iommu *iommu, void *addr, int size)
{
- return;
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
}
-#endif /* !CONFIG_DMAR_GFX_WA */
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern int alloc_iommu(struct dmar_drhd_unit *drhd);
+extern void free_iommu(struct intel_iommu *iommu);
+extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void qi_global_iec(struct intel_iommu *iommu);
+
+extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
#endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644
index 00000000000..bb642cc5e18
--- /dev/null
+++ b/drivers/pci/intr_remapping.c
@@ -0,0 +1,471 @@
+#include <linux/dmar.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <asm/io_apic.h>
+#include "intel-iommu.h"
+#include "intr_remapping.h"
+
+static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
+static int ir_ioapic_num;
+int intr_remapping_enabled;
+
+static struct {
+ struct intel_iommu *iommu;
+ u16 irte_index;
+ u16 sub_handle;
+ u8 irte_mask;
+} irq_2_iommu[NR_IRQS];
+
+static DEFINE_SPINLOCK(irq_2_ir_lock);
+
+int irq_remapped(int irq)
+{
+ if (irq > NR_IRQS)
+ return 0;
+
+ if (!irq_2_iommu[irq].iommu)
+ return 0;
+
+ return 1;
+}
+
+int get_irte(int irq, struct irte *entry)
+{
+ int index;
+
+ if (!entry || irq > NR_IRQS)
+ return -1;
+
+ spin_lock(&irq_2_ir_lock);
+ if (!irq_2_iommu[irq].iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ return -1;
+ }
+
+ index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+ *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
+
+ spin_unlock(&irq_2_ir_lock);
+ return 0;
+}
+
+int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+{
+ struct ir_table *table = iommu->ir_table;
+ u16 index, start_index;
+ unsigned int mask = 0;
+ int i;
+
+ if (!count)
+ return -1;
+
+ /*
+ * start the IRTE search from index 0.
+ */
+ index = start_index = 0;
+
+ if (count > 1) {
+ count = __roundup_pow_of_two(count);
+ mask = ilog2(count);
+ }
+
+ if (mask > ecap_max_handle_mask(iommu->ecap)) {
+ printk(KERN_ERR
+ "Requested mask %x exceeds the max invalidation handle"
+ " mask value %Lx\n", mask,
+ ecap_max_handle_mask(iommu->ecap));
+ return -1;
+ }
+
+ spin_lock(&irq_2_ir_lock);
+ do {
+ for (i = index; i < index + count; i++)
+ if (table->base[i].present)
+ break;
+ /* empty index found */
+ if (i == index + count)
+ break;
+
+ index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
+
+ if (index == start_index) {
+ spin_unlock(&irq_2_ir_lock);
+ printk(KERN_ERR "can't allocate an IRTE\n");
+ return -1;
+ }
+ } while (1);
+
+ for (i = index; i < index + count; i++)
+ table->base[i].present = 1;
+
+ irq_2_iommu[irq].iommu = iommu;
+ irq_2_iommu[irq].irte_index = index;
+ irq_2_iommu[irq].sub_handle = 0;
+ irq_2_iommu[irq].irte_mask = mask;
+
+ spin_unlock(&irq_2_ir_lock);
+
+ return index;
+}
+
+static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
+{
+ struct qi_desc desc;
+
+ desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
+ | QI_IEC_SELECTIVE;
+ desc.high = 0;
+
+ qi_submit_sync(&desc, iommu);
+}
+
+int map_irq_to_irte_handle(int irq, u16 *sub_handle)
+{
+ int index;
+
+ spin_lock(&irq_2_ir_lock);
+ if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ return -1;
+ }
+
+ *sub_handle = irq_2_iommu[irq].sub_handle;
+ index = irq_2_iommu[irq].irte_index;
+ spin_unlock(&irq_2_ir_lock);
+ return index;
+}
+
+int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
+{
+ spin_lock(&irq_2_ir_lock);
+ if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ return -1;
+ }
+
+ irq_2_iommu[irq].iommu = iommu;
+ irq_2_iommu[irq].irte_index = index;
+ irq_2_iommu[irq].sub_handle = subhandle;
+ irq_2_iommu[irq].irte_mask = 0;
+
+ spin_unlock(&irq_2_ir_lock);
+
+ return 0;
+}
+
+int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
+{
+ spin_lock(&irq_2_ir_lock);
+ if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ return -1;
+ }
+
+ irq_2_iommu[irq].iommu = NULL;
+ irq_2_iommu[irq].irte_index = 0;
+ irq_2_iommu[irq].sub_handle = 0;
+ irq_2_iommu[irq].irte_mask = 0;
+
+ spin_unlock(&irq_2_ir_lock);
+
+ return 0;
+}
+
+int modify_irte(int irq, struct irte *irte_modified)
+{
+ int index;
+ struct irte *irte;
+ struct intel_iommu *iommu;
+
+ spin_lock(&irq_2_ir_lock);
+ if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ return -1;
+ }
+
+ iommu = irq_2_iommu[irq].iommu;
+
+ index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+ irte = &iommu->ir_table->base[index];
+
+ set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
+ __iommu_flush_cache(iommu, irte, sizeof(*irte));
+
+ qi_flush_iec(iommu, index, 0);
+
+ spin_unlock(&irq_2_ir_lock);
+ return 0;
+}
+
+int flush_irte(int irq)
+{
+ int index;
+ struct intel_iommu *iommu;
+
+ spin_lock(&irq_2_ir_lock);
+ if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ return -1;
+ }
+
+ iommu = irq_2_iommu[irq].iommu;
+
+ index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+
+ qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+ spin_unlock(&irq_2_ir_lock);
+
+ return 0;
+}
+
+struct intel_iommu *map_ioapic_to_ir(int apic)
+{
+ int i;
+
+ for (i = 0; i < MAX_IO_APICS; i++)
+ if (ir_ioapic[i].id == apic)
+ return ir_ioapic[i].iommu;
+ return NULL;
+}
+
+struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
+{
+ struct dmar_drhd_unit *drhd;
+
+ drhd = dmar_find_matched_drhd_unit(dev);
+ if (!drhd)
+ return NULL;
+
+ return drhd->iommu;
+}
+
+int free_irte(int irq)
+{
+ int index, i;
+ struct irte *irte;
+ struct intel_iommu *iommu;
+
+ spin_lock(&irq_2_ir_lock);
+ if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ return -1;
+ }
+
+ iommu = irq_2_iommu[irq].iommu;
+
+ index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+ irte = &iommu->ir_table->base[index];
+
+ if (!irq_2_iommu[irq].sub_handle) {
+ for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
+ set_64bit((unsigned long *)irte, 0);
+ qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+ }
+
+ irq_2_iommu[irq].iommu = NULL;
+ irq_2_iommu[irq].irte_index = 0;
+ irq_2_iommu[irq].sub_handle = 0;
+ irq_2_iommu[irq].irte_mask = 0;
+
+ spin_unlock(&irq_2_ir_lock);
+
+ return 0;
+}
+
+static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
+{
+ u64 addr;
+ u32 cmd, sts;
+ unsigned long flags;
+
+ addr = virt_to_phys((void *)iommu->ir_table->base);
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+
+ dmar_writeq(iommu->reg + DMAR_IRTA_REG,
+ (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
+
+ /* Set interrupt-remapping table pointer */
+ cmd = iommu->gcmd | DMA_GCMD_SIRTP;
+ writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, (sts & DMA_GSTS_IRTPS), sts);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ /*
+ * global invalidation of interrupt entry cache before enabling
+ * interrupt-remapping.
+ */
+ qi_global_iec(iommu);
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+
+ /* Enable interrupt-remapping */
+ cmd = iommu->gcmd | DMA_GCMD_IRE;
+ iommu->gcmd |= DMA_GCMD_IRE;
+ writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, (sts & DMA_GSTS_IRES), sts);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+
+static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
+{
+ struct ir_table *ir_table;
+ struct page *pages;
+
+ ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
+ GFP_KERNEL);
+
+ if (!iommu->ir_table)
+ return -ENOMEM;
+
+ pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
+
+ if (!pages) {
+ printk(KERN_ERR "failed to allocate pages of order %d\n",
+ INTR_REMAP_PAGE_ORDER);
+ kfree(iommu->ir_table);
+ return -ENOMEM;
+ }
+
+ ir_table->base = page_address(pages);
+
+ iommu_set_intr_remapping(iommu, mode);
+ return 0;
+}
+
+int __init enable_intr_remapping(int eim)
+{
+ struct dmar_drhd_unit *drhd;
+ int setup = 0;
+
+ /*
+ * check for the Interrupt-remapping support
+ */
+ for_each_drhd_unit(drhd) {
+ struct intel_iommu *iommu = drhd->iommu;
+
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ if (eim && !ecap_eim_support(iommu->ecap)) {
+ printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
+ " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
+ return -1;
+ }
+ }
+
+ /*
+ * Enable queued invalidation for all the DRHD's.
+ */
+ for_each_drhd_unit(drhd) {
+ int ret;
+ struct intel_iommu *iommu = drhd->iommu;
+ ret = dmar_enable_qi(iommu);
+
+ if (ret) {
+ printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
+ " invalidation, ecap %Lx, ret %d\n",
+ drhd->reg_base_addr, iommu->ecap, ret);
+ return -1;
+ }
+ }
+
+ /*
+ * Setup Interrupt-remapping for all the DRHD's now.
+ */
+ for_each_drhd_unit(drhd) {
+ struct intel_iommu *iommu = drhd->iommu;
+
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ if (setup_intr_remapping(iommu, eim))
+ goto error;
+
+ setup = 1;
+ }
+
+ if (!setup)
+ goto error;
+
+ intr_remapping_enabled = 1;
+
+ return 0;
+
+error:
+ /*
+ * handle error condition gracefully here!
+ */
+ return -1;
+}
+
+static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
+ struct intel_iommu *iommu)
+{
+ struct acpi_dmar_hardware_unit *drhd;
+ struct acpi_dmar_device_scope *scope;
+ void *start, *end;
+
+ drhd = (struct acpi_dmar_hardware_unit *)header;
+
+ start = (void *)(drhd + 1);
+ end = ((void *)drhd) + header->length;
+
+ while (start < end) {
+ scope = start;
+ if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
+ if (ir_ioapic_num == MAX_IO_APICS) {
+ printk(KERN_WARNING "Exceeded Max IO APICS\n");
+ return -1;
+ }
+
+ printk(KERN_INFO "IOAPIC id %d under DRHD base"
+ " 0x%Lx\n", scope->enumeration_id,
+ drhd->address);
+
+ ir_ioapic[ir_ioapic_num].iommu = iommu;
+ ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
+ ir_ioapic_num++;
+ }
+ start += scope->length;
+ }
+
+ return 0;
+}
+
+/*
+ * Finds the assocaition between IOAPIC's and its Interrupt-remapping
+ * hardware unit.
+ */
+int __init parse_ioapics_under_ir(void)
+{
+ struct dmar_drhd_unit *drhd;
+ int ir_supported = 0;
+
+ for_each_drhd_unit(drhd) {
+ struct intel_iommu *iommu = drhd->iommu;
+
+ if (ecap_ir_support(iommu->ecap)) {
+ if (ir_parse_ioapic_scope(drhd->hdr, iommu))
+ return -1;
+
+ ir_supported = 1;
+ }
+ }
+
+ if (ir_supported && ir_ioapic_num != nr_ioapics) {
+ printk(KERN_WARNING
+ "Not all IO-APIC's listed under remapping hardware\n");
+ return -1;
+ }
+
+ return ir_supported;
+}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644
index 00000000000..05f2635bbe4
--- /dev/null
+++ b/drivers/pci/intr_remapping.h
@@ -0,0 +1,8 @@
+#include "intel-iommu.h"
+
+struct ioapic_scope {
+ struct intel_iommu *iommu;
+ unsigned int id;
+};
+
+#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 15af618d36e..4a10b5624f7 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -126,7 +126,16 @@ static void msix_flush_writes(unsigned int irq)
}
}
-static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
+/*
+ * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to
+ * mask all MSI interrupts by clearing the MSI enable bit does not work
+ * reliably as devices without an INTx disable bit will then generate a
+ * level IRQ which will never be cleared.
+ *
+ * Returns 1 if it succeeded in masking the interrupt and 0 if the device
+ * doesn't support MSI masking.
+ */
+static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
{
struct msi_desc *entry;
@@ -144,8 +153,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
mask_bits |= flag & mask;
pci_write_config_dword(entry->dev, pos, mask_bits);
} else {
- __msi_set_enable(entry->dev, entry->msi_attrib.pos,
- !flag);
+ return 0;
}
break;
case PCI_CAP_ID_MSIX:
@@ -161,6 +169,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
break;
}
entry->msi_attrib.masked = !!flag;
+ return 1;
}
void read_msi_msg(unsigned int irq, struct msi_msg *msg)
@@ -299,9 +308,8 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
entry->msi_attrib.masked);
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
- control &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
- if (entry->msi_attrib.maskbit || !entry->msi_attrib.masked)
- control |= PCI_MSI_FLAGS_ENABLE;
+ control &= ~PCI_MSI_FLAGS_QSIZE;
+ control |= PCI_MSI_FLAGS_ENABLE;
pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
}
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7764768b6a0..89a2f0fa10f 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/module.h>
+#include <linux/pci-aspm.h>
#include <acpi/acpi.h>
#include <acpi/acnamesp.h>
#include <acpi/acresrc.h>
@@ -372,6 +373,12 @@ static int __init acpi_pci_init(void)
printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n");
pci_no_msi();
}
+
+ if (acpi_gbl_FADT.boot_flags & BAF_PCIE_ASPM_CONTROL) {
+ printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
+ pcie_no_aspm();
+ }
+
ret = register_acpi_bus_type(&acpi_pci_bus);
if (ret)
return 0;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d00f0e0d845..c9884bba22d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -572,6 +572,10 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
if (!ret)
pci_update_current_state(dev);
}
+ /* This device is quirked not to be put into D3, so
+ don't put it in D3 */
+ if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
+ return 0;
error = pci_raw_set_power_state(dev, state);
@@ -1040,7 +1044,7 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
* @dev: PCI device to handle.
* @state: PCI state from which device will issue PME#.
*/
-static bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
+bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
{
if (!dev->pm_cap)
return false;
@@ -1056,7 +1060,7 @@ static bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
* The caller must verify that the device is capable of generating PME# before
* calling this function with @enable equal to 'true'.
*/
-static void pci_pme_active(struct pci_dev *dev, bool enable)
+void pci_pme_active(struct pci_dev *dev, bool enable)
{
u16 pmcsr;
@@ -1123,17 +1127,16 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
}
/**
- * pci_prepare_to_sleep - prepare PCI device for system-wide transition into a sleep state
- * @dev: Device to handle.
+ * pci_target_state - find an appropriate low power state for a given PCI dev
+ * @dev: PCI device
*
- * Choose the power state appropriate for the device depending on whether
- * it can wake up the system and/or is power manageable by the platform
- * (PCI_D3hot is the default) and put the device into that state.
+ * Use underlying platform code to find a supported low power state for @dev.
+ * If the platform can't manage @dev, return the deepest state from which it
+ * can generate wake events, based on any available PME info.
*/
-int pci_prepare_to_sleep(struct pci_dev *dev)
+pci_power_t pci_target_state(struct pci_dev *dev)
{
pci_power_t target_state = PCI_D3hot;
- int error;
if (platform_pci_power_manageable(dev)) {
/*
@@ -1160,7 +1163,7 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
* to generate PME#.
*/
if (!dev->pm_cap)
- return -EIO;
+ return PCI_POWER_ERROR;
if (dev->pme_support) {
while (target_state
@@ -1169,6 +1172,25 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
}
}
+ return target_state;
+}
+
+/**
+ * pci_prepare_to_sleep - prepare PCI device for system-wide transition into a sleep state
+ * @dev: Device to handle.
+ *
+ * Choose the power state appropriate for the device depending on whether
+ * it can wake up the system and/or is power manageable by the platform
+ * (PCI_D3hot is the default) and put the device into that state.
+ */
+int pci_prepare_to_sleep(struct pci_dev *dev)
+{
+ pci_power_t target_state = pci_target_state(dev);
+ int error;
+
+ if (target_state == PCI_POWER_ERROR)
+ return -EIO;
+
pci_enable_wake(dev, target_state, true);
error = pci_set_power_state(dev, target_state);
@@ -1918,7 +1940,10 @@ EXPORT_SYMBOL(pci_select_bars);
EXPORT_SYMBOL(pci_set_power_state);
EXPORT_SYMBOL(pci_save_state);
EXPORT_SYMBOL(pci_restore_state);
+EXPORT_SYMBOL(pci_pme_capable);
+EXPORT_SYMBOL(pci_pme_active);
EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_target_state);
EXPORT_SYMBOL(pci_prepare_to_sleep);
EXPORT_SYMBOL(pci_back_from_sleep);
EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f82495583e6..9a7c9e1408a 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -55,7 +55,7 @@ struct pcie_link_state {
struct endpoint_state endpoints[8];
};
-static int aspm_disabled;
+static int aspm_disabled, aspm_force;
static DEFINE_MUTEX(aspm_lock);
static LIST_HEAD(link_list);
@@ -510,6 +510,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
{
struct pci_dev *child_dev;
int child_pos;
+ u32 reg32;
/*
* Some functions in a slot might not all be PCIE functions, very
@@ -519,6 +520,19 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
if (!child_pos)
return -EINVAL;
+
+ /*
+ * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
+ * RBER bit to determine if a function is 1.1 version device
+ */
+ pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
+ &reg32);
+ if (!(reg32 & PCI_EXP_DEVCAP_RBER && !aspm_force)) {
+ printk("Pre-1.1 PCIe device detected, "
+ "disable ASPM for %s. It can be enabled forcedly"
+ " with 'pcie_aspm=force'\n", pci_name(pdev));
+ return -EINVAL;
+ }
}
return 0;
}
@@ -802,11 +816,23 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
static int __init pcie_aspm_disable(char *str)
{
- aspm_disabled = 1;
+ if (!strcmp(str, "off")) {
+ aspm_disabled = 1;
+ printk(KERN_INFO "PCIe ASPM is disabled\n");
+ } else if (!strcmp(str, "force")) {
+ aspm_force = 1;
+ printk(KERN_INFO "PCIe ASPM is forcedly enabled\n");
+ }
return 1;
}
-__setup("pcie_noaspm", pcie_aspm_disable);
+__setup("pcie_aspm=", pcie_aspm_disable);
+
+void pcie_no_aspm(void)
+{
+ if (!aspm_force)
+ aspm_disabled = 1;
+}
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index b1724cf31b6..a04498d390c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -52,27 +52,49 @@ EXPORT_SYMBOL(no_pci_devices);
* Some platforms allow access to legacy I/O port and ISA memory space on
* a per-bus basis. This routine creates the files and ties them into
* their associated read, write and mmap files from pci-sysfs.c
+ *
+ * On error unwind, but don't propogate the error to the caller
+ * as it is ok to set up the PCI bus without these files.
*/
static void pci_create_legacy_files(struct pci_bus *b)
{
+ int error;
+
b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
GFP_ATOMIC);
- if (b->legacy_io) {
- b->legacy_io->attr.name = "legacy_io";
- b->legacy_io->size = 0xffff;
- b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
- b->legacy_io->read = pci_read_legacy_io;
- b->legacy_io->write = pci_write_legacy_io;
- device_create_bin_file(&b->dev, b->legacy_io);
-
- /* Allocated above after the legacy_io struct */
- b->legacy_mem = b->legacy_io + 1;
- b->legacy_mem->attr.name = "legacy_mem";
- b->legacy_mem->size = 1024*1024;
- b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
- b->legacy_mem->mmap = pci_mmap_legacy_mem;
- device_create_bin_file(&b->dev, b->legacy_mem);
- }
+ if (!b->legacy_io)
+ goto kzalloc_err;
+
+ b->legacy_io->attr.name = "legacy_io";
+ b->legacy_io->size = 0xffff;
+ b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+ b->legacy_io->read = pci_read_legacy_io;
+ b->legacy_io->write = pci_write_legacy_io;
+ error = device_create_bin_file(&b->dev, b->legacy_io);
+ if (error)
+ goto legacy_io_err;
+
+ /* Allocated above after the legacy_io struct */
+ b->legacy_mem = b->legacy_io + 1;
+ b->legacy_mem->attr.name = "legacy_mem";
+ b->legacy_mem->size = 1024*1024;
+ b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+ b->legacy_mem->mmap = pci_mmap_legacy_mem;
+ error = device_create_bin_file(&b->dev, b->legacy_mem);
+ if (error)
+ goto legacy_mem_err;
+
+ return;
+
+legacy_mem_err:
+ device_remove_bin_file(&b->dev, b->legacy_io);
+legacy_io_err:
+ kfree(b->legacy_io);
+ b->legacy_io = NULL;
+kzalloc_err:
+ printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
+ "and ISA memory resources to sysfs\n");
+ return;
}
void pci_remove_legacy_files(struct pci_bus *b)
@@ -163,12 +185,9 @@ static inline unsigned int pci_calc_resource_flags(unsigned int flags)
return IORESOURCE_MEM;
}
-/*
- * Find the extent of a PCI decode..
- */
-static u32 pci_size(u32 base, u32 maxbase, u32 mask)
+static u64 pci_size(u64 base, u64 maxbase, u64 mask)
{
- u32 size = mask & maxbase; /* Find the significant bits */
+ u64 size = mask & maxbase; /* Find the significant bits */
if (!size)
return 0;
@@ -184,135 +203,142 @@ static u32 pci_size(u32 base, u32 maxbase, u32 mask)
return size;
}
-static u64 pci_size64(u64 base, u64 maxbase, u64 mask)
+enum pci_bar_type {
+ pci_bar_unknown, /* Standard PCI BAR probe */
+ pci_bar_io, /* An io port BAR */
+ pci_bar_mem32, /* A 32-bit memory BAR */
+ pci_bar_mem64, /* A 64-bit memory BAR */
+};
+
+static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
{
- u64 size = mask & maxbase; /* Find the significant bits */
- if (!size)
- return 0;
+ if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
+ res->flags = bar & ~PCI_BASE_ADDRESS_IO_MASK;
+ return pci_bar_io;
+ }
- /* Get the lowest of them to find the decode size, and
- from that the extent. */
- size = (size & ~(size-1)) - 1;
+ res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
- /* base == maxbase can be valid only if the BAR has
- already been programmed with all 1s. */
- if (base == maxbase && ((base | size) & mask) != mask)
- return 0;
-
- return size;
+ if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64)
+ return pci_bar_mem64;
+ return pci_bar_mem32;
}
-static inline int is_64bit_memory(u32 mask)
+/*
+ * If the type is not unknown, we assume that the lowest bit is 'enable'.
+ * Returns 1 if the BAR was 64-bit and 0 if it was 32-bit.
+ */
+static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+ struct resource *res, unsigned int pos)
{
- if ((mask & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
- (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64))
- return 1;
- return 0;
-}
+ u32 l, sz, mask;
-static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
-{
- unsigned int pos, reg, next;
- u32 l, sz;
- struct resource *res;
+ mask = type ? ~PCI_ROM_ADDRESS_ENABLE : ~0;
- for(pos=0; pos<howmany; pos = next) {
- u64 l64;
- u64 sz64;
- u32 raw_sz;
+ res->name = pci_name(dev);
- next = pos+1;
- res = &dev->resource[pos];
- res->name = pci_name(dev);
- reg = PCI_BASE_ADDRESS_0 + (pos << 2);
- pci_read_config_dword(dev, reg, &l);
- pci_write_config_dword(dev, reg, ~0);
- pci_read_config_dword(dev, reg, &sz);
- pci_write_config_dword(dev, reg, l);
- if (!sz || sz == 0xffffffff)
- continue;
- if (l == 0xffffffff)
- l = 0;
- raw_sz = sz;
- if ((l & PCI_BASE_ADDRESS_SPACE) ==
- PCI_BASE_ADDRESS_SPACE_MEMORY) {
- sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK);
- /*
- * For 64bit prefetchable memory sz could be 0, if the
- * real size is bigger than 4G, so we need to check
- * szhi for that.
- */
- if (!is_64bit_memory(l) && !sz)
- continue;
- res->start = l & PCI_BASE_ADDRESS_MEM_MASK;
- res->flags |= l & ~PCI_BASE_ADDRESS_MEM_MASK;
+ pci_read_config_dword(dev, pos, &l);
+ pci_write_config_dword(dev, pos, mask);
+ pci_read_config_dword(dev, pos, &sz);
+ pci_write_config_dword(dev, pos, l);
+
+ /*
+ * All bits set in sz means the device isn't working properly.
+ * If the BAR isn't implemented, all bits must be 0. If it's a
+ * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
+ * 1 must be clear.
+ */
+ if (!sz || sz == 0xffffffff)
+ goto fail;
+
+ /*
+ * I don't know how l can have all bits set. Copied from old code.
+ * Maybe it fixes a bug on some ancient platform.
+ */
+ if (l == 0xffffffff)
+ l = 0;
+
+ if (type == pci_bar_unknown) {
+ type = decode_bar(res, l);
+ res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN;
+ if (type == pci_bar_io) {
+ l &= PCI_BASE_ADDRESS_IO_MASK;
+ mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff;
} else {
- sz = pci_size(l, sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff);
- if (!sz)
- continue;
- res->start = l & PCI_BASE_ADDRESS_IO_MASK;
- res->flags |= l & ~PCI_BASE_ADDRESS_IO_MASK;
+ l &= PCI_BASE_ADDRESS_MEM_MASK;
+ mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
}
- res->end = res->start + (unsigned long) sz;
- res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN;
- if (is_64bit_memory(l)) {
- u32 szhi, lhi;
-
- pci_read_config_dword(dev, reg+4, &lhi);
- pci_write_config_dword(dev, reg+4, ~0);
- pci_read_config_dword(dev, reg+4, &szhi);
- pci_write_config_dword(dev, reg+4, lhi);
- sz64 = ((u64)szhi << 32) | raw_sz;
- l64 = ((u64)lhi << 32) | l;
- sz64 = pci_size64(l64, sz64, PCI_BASE_ADDRESS_MEM_MASK);
- next++;
-#if BITS_PER_LONG == 64
- if (!sz64) {
- res->start = 0;
- res->end = 0;
- res->flags = 0;
- continue;
- }
- res->start = l64 & PCI_BASE_ADDRESS_MEM_MASK;
- res->end = res->start + sz64;
-#else
- if (sz64 > 0x100000000ULL) {
- dev_err(&dev->dev, "BAR %d: can't handle 64-bit"
- " BAR\n", pos);
- res->start = 0;
- res->flags = 0;
- } else if (lhi) {
- /* 64-bit wide address, treat as disabled */
- pci_write_config_dword(dev, reg,
- l & ~(u32)PCI_BASE_ADDRESS_MEM_MASK);
- pci_write_config_dword(dev, reg+4, 0);
- res->start = 0;
- res->end = sz;
- }
-#endif
+ } else {
+ res->flags |= (l & IORESOURCE_ROM_ENABLE);
+ l &= PCI_ROM_ADDRESS_MASK;
+ mask = (u32)PCI_ROM_ADDRESS_MASK;
+ }
+
+ if (type == pci_bar_mem64) {
+ u64 l64 = l;
+ u64 sz64 = sz;
+ u64 mask64 = mask | (u64)~0 << 32;
+
+ pci_read_config_dword(dev, pos + 4, &l);
+ pci_write_config_dword(dev, pos + 4, ~0);
+ pci_read_config_dword(dev, pos + 4, &sz);
+ pci_write_config_dword(dev, pos + 4, l);
+
+ l64 |= ((u64)l << 32);
+ sz64 |= ((u64)sz << 32);
+
+ sz64 = pci_size(l64, sz64, mask64);
+
+ if (!sz64)
+ goto fail;
+
+ if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) {
+ dev_err(&dev->dev, "can't handle 64-bit BAR\n");
+ goto fail;
+ } else if ((sizeof(resource_size_t) < 8) && l) {
+ /* Address above 32-bit boundary; disable the BAR */
+ pci_write_config_dword(dev, pos, 0);
+ pci_write_config_dword(dev, pos + 4, 0);
+ res->start = 0;
+ res->end = sz64;
+ } else {
+ res->start = l64;
+ res->end = l64 + sz64;
}
+ } else {
+ sz = pci_size(l, sz, mask);
+
+ if (!sz)
+ goto fail;
+
+ res->start = l;
+ res->end = l + sz;
}
+
+ out:
+ return (type == pci_bar_mem64) ? 1 : 0;
+ fail:
+ res->flags = 0;
+ goto out;
+}
+
+static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
+{
+ unsigned int pos, reg;
+
+ for (pos = 0; pos < howmany; pos++) {
+ struct resource *res = &dev->resource[pos];
+ reg = PCI_BASE_ADDRESS_0 + (pos << 2);
+ pos += __pci_read_base(dev, pci_bar_unknown, res, reg);
+ }
+
if (rom) {
+ struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
dev->rom_base_reg = rom;
- res = &dev->resource[PCI_ROM_RESOURCE];
- res->name = pci_name(dev);
- pci_read_config_dword(dev, rom, &l);
- pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE);
- pci_read_config_dword(dev, rom, &sz);
- pci_write_config_dword(dev, rom, l);
- if (l == 0xffffffff)
- l = 0;
- if (sz && sz != 0xffffffff) {
- sz = pci_size(l, sz, (u32)PCI_ROM_ADDRESS_MASK);
- if (sz) {
- res->flags = (l & IORESOURCE_ROM_ENABLE) |
- IORESOURCE_MEM | IORESOURCE_PREFETCH |
- IORESOURCE_READONLY | IORESOURCE_CACHEABLE |
- IORESOURCE_SIZEALIGN;
- res->start = l & PCI_ROM_ADDRESS_MASK;
- res->end = res->start + (unsigned long) sz;
- }
- }
+ res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
+ IORESOURCE_READONLY | IORESOURCE_CACHEABLE |
+ IORESOURCE_SIZEALIGN;
+ __pci_read_base(dev, pci_bar_mem32, res, rom);
}
}
@@ -1053,7 +1079,8 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
}
}
- if (bus->self)
+ /* only one slot has pcie device */
+ if (bus->self && nr)
pcie_aspm_init_link_state(bus->self);
return nr;
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 4400dffbd93..e1098c302c4 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -88,7 +88,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp
if ((pos & 3) && cnt > 2) {
unsigned short val;
pci_user_read_config_word(dev, pos, &val);
- __put_user(cpu_to_le16(val), (unsigned short __user *) buf);
+ __put_user(cpu_to_le16(val), (__le16 __user *) buf);
buf += 2;
pos += 2;
cnt -= 2;
@@ -97,7 +97,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp
while (cnt >= 4) {
unsigned int val;
pci_user_read_config_dword(dev, pos, &val);
- __put_user(cpu_to_le32(val), (unsigned int __user *) buf);
+ __put_user(cpu_to_le32(val), (__le32 __user *) buf);
buf += 4;
pos += 4;
cnt -= 4;
@@ -106,7 +106,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp
if (cnt >= 2) {
unsigned short val;
pci_user_read_config_word(dev, pos, &val);
- __put_user(cpu_to_le16(val), (unsigned short __user *) buf);
+ __put_user(cpu_to_le16(val), (__le16 __user *) buf);
buf += 2;
pos += 2;
cnt -= 2;
@@ -156,8 +156,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof
}
if ((pos & 3) && cnt > 2) {
- unsigned short val;
- __get_user(val, (unsigned short __user *) buf);
+ __le16 val;
+ __get_user(val, (__le16 __user *) buf);
pci_user_write_config_word(dev, pos, le16_to_cpu(val));
buf += 2;
pos += 2;
@@ -165,8 +165,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof
}
while (cnt >= 4) {
- unsigned int val;
- __get_user(val, (unsigned int __user *) buf);
+ __le32 val;
+ __get_user(val, (__le32 __user *) buf);
pci_user_write_config_dword(dev, pos, le32_to_cpu(val));
buf += 4;
pos += 4;
@@ -174,8 +174,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof
}
if (cnt >= 2) {
- unsigned short val;
- __get_user(val, (unsigned short __user *) buf);
+ __le16 val;
+ __get_user(val, (__le16 __user *) buf);
pci_user_write_config_word(dev, pos, le16_to_cpu(val));
buf += 2;
pos += 2;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 12d489395fa..9236e7f869c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -923,6 +923,19 @@ static void __init quirk_ide_samemode(struct pci_dev *pdev)
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode);
+/*
+ * Some ATA devices break if put into D3
+ */
+
+static void __devinit quirk_no_ata_d3(struct pci_dev *pdev)
+{
+ /* Quirk the legacy ATA devices only. The AHCI ones are ok */
+ if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE)
+ pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3);
+
/* This was originally an Alpha specific thing, but it really fits here.
* The i82375 PCI/EISA bridge appears as non-classified. Fix that.
*/
@@ -1743,9 +1756,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_c
*/
static void __devinit quirk_brcm_570x_limit_vpd(struct pci_dev *dev)
{
- /* Only disable the VPD capability for 5706, 5708, and 5709 rev. A */
+ /*
+ * Only disable the VPD capability for 5706, 5706S, 5708,
+ * 5708S and 5709 rev. A
+ */
if ((dev->device == PCI_DEVICE_ID_NX2_5706) ||
+ (dev->device == PCI_DEVICE_ID_NX2_5706S) ||
(dev->device == PCI_DEVICE_ID_NX2_5708) ||
+ (dev->device == PCI_DEVICE_ID_NX2_5708S) ||
((dev->device == PCI_DEVICE_ID_NX2_5709) &&
(dev->revision & 0xf0) == 0x0)) {
if (dev->vpd)