summaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/apic.c33
-rw-r--r--arch/x86/xen/debugfs.c104
-rw-r--r--arch/x86/xen/debugfs.h4
-rw-r--r--arch/x86/xen/enlighten.c15
-rw-r--r--arch/x86/xen/mmu.c27
-rw-r--r--arch/x86/xen/p2m.c140
-rw-r--r--arch/x86/xen/setup.c170
-rw-r--r--arch/x86/xen/smp.c131
-rw-r--r--arch/x86/xen/smp.h12
-rw-r--r--arch/x86/xen/spinlock.c12
-rw-r--r--arch/x86/xen/xen-asm_32.S6
-rw-r--r--arch/x86/xen/xen-ops.h5
13 files changed, 427 insertions, 234 deletions
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index add2c2d729c..96ab2c09cb6 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -20,5 +20,5 @@ obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
-obj-$(CONFIG_XEN_DOM0) += vga.o
+obj-$(CONFIG_XEN_DOM0) += apic.o vga.o
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
new file mode 100644
index 00000000000..ec57bd3818a
--- /dev/null
+++ b/arch/x86/xen/apic.c
@@ -0,0 +1,33 @@
+#include <linux/init.h>
+
+#include <asm/x86_init.h>
+#include <asm/apic.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xen.h>
+#include <xen/interface/physdev.h>
+
+unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
+{
+ struct physdev_apic apic_op;
+ int ret;
+
+ apic_op.apic_physbase = mpc_ioapic_addr(apic);
+ apic_op.reg = reg;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+ if (!ret)
+ return apic_op.value;
+
+ /* fallback to return an emulated IO_APIC values */
+ if (reg == 0x1)
+ return 0x00170020;
+ else if (reg == 0x0)
+ return apic << 24;
+
+ return 0xfd;
+}
+
+void __init xen_init_apic(void)
+{
+ x86_io_apic_ops.read = xen_io_apic_read;
+}
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index ef1db1900d8..c8377fb26cd 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -19,107 +19,3 @@ struct dentry * __init xen_init_debugfs(void)
return d_xen_debug;
}
-struct array_data
-{
- void *array;
- unsigned elements;
-};
-
-static int u32_array_open(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
- return nonseekable_open(inode, file);
-}
-
-static size_t format_array(char *buf, size_t bufsize, const char *fmt,
- u32 *array, unsigned array_size)
-{
- size_t ret = 0;
- unsigned i;
-
- for(i = 0; i < array_size; i++) {
- size_t len;
-
- len = snprintf(buf, bufsize, fmt, array[i]);
- len++; /* ' ' or '\n' */
- ret += len;
-
- if (buf) {
- buf += len;
- bufsize -= len;
- buf[-1] = (i == array_size-1) ? '\n' : ' ';
- }
- }
-
- ret++; /* \0 */
- if (buf)
- *buf = '\0';
-
- return ret;
-}
-
-static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
-{
- size_t len = format_array(NULL, 0, fmt, array, array_size);
- char *ret;
-
- ret = kmalloc(len, GFP_KERNEL);
- if (ret == NULL)
- return NULL;
-
- format_array(ret, len, fmt, array, array_size);
- return ret;
-}
-
-static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
- loff_t *ppos)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct array_data *data = inode->i_private;
- size_t size;
-
- if (*ppos == 0) {
- if (file->private_data) {
- kfree(file->private_data);
- file->private_data = NULL;
- }
-
- file->private_data = format_array_alloc("%u", data->array, data->elements);
- }
-
- size = 0;
- if (file->private_data)
- size = strlen(file->private_data);
-
- return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
-}
-
-static int xen_array_release(struct inode *inode, struct file *file)
-{
- kfree(file->private_data);
-
- return 0;
-}
-
-static const struct file_operations u32_array_fops = {
- .owner = THIS_MODULE,
- .open = u32_array_open,
- .release= xen_array_release,
- .read = u32_array_read,
- .llseek = no_llseek,
-};
-
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
- struct dentry *parent,
- u32 *array, unsigned elements)
-{
- struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
-
- if (data == NULL)
- return NULL;
-
- data->array = array;
- data->elements = elements;
-
- return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
-}
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h
index 78d25499be5..12ebf3325c7 100644
--- a/arch/x86/xen/debugfs.h
+++ b/arch/x86/xen/debugfs.h
@@ -3,8 +3,4 @@
struct dentry * __init xen_init_debugfs(void);
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
- struct dentry *parent,
- u32 *array, unsigned elements);
-
#endif /* _XEN_DEBUGFS_H */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 97559a67400..c7cadd61022 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
#include <xen/page.h>
#include <xen/hvm.h>
#include <xen/hvc-console.h>
+#include <xen/acpi.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
@@ -75,6 +76,7 @@
#include "xen-ops.h"
#include "mmu.h"
+#include "smp.h"
#include "multicalls.h"
EXPORT_SYMBOL_GPL(hypercall_page);
@@ -891,6 +893,14 @@ static void set_xen_basic_apic_ops(void)
apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
apic->set_apic_id = xen_set_apic_id;
apic->get_apic_id = xen_get_apic_id;
+
+#ifdef CONFIG_SMP
+ apic->send_IPI_allbutself = xen_send_IPI_allbutself;
+ apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself;
+ apic->send_IPI_mask = xen_send_IPI_mask;
+ apic->send_IPI_all = xen_send_IPI_all;
+ apic->send_IPI_self = xen_send_IPI_self;
+#endif
}
#endif
@@ -1352,7 +1362,6 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
- xen_ident_map_ISA();
/* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list();
@@ -1408,8 +1417,12 @@ asmlinkage void __init xen_start_kernel(void)
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
+ xen_init_apic();
+
/* Make sure ACS will be enabled */
pci_request_acs();
+
+ xen_acpi_sleep_register();
}
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 69f5857660a..3a73785631c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1864,7 +1864,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
#endif /* CONFIG_X86_64 */
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
-static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
@@ -1905,7 +1904,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
* We just don't map the IO APIC - all access is via
* hypercalls. Keep the address in the pte for reference.
*/
- pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL);
+ pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
break;
#endif
@@ -1934,29 +1933,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
}
-void __init xen_ident_map_ISA(void)
-{
- unsigned long pa;
-
- /*
- * If we're dom0, then linear map the ISA machine addresses into
- * the kernel's address space.
- */
- if (!xen_initial_domain())
- return;
-
- xen_raw_printk("Xen: setup ISA identity maps\n");
-
- for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
- pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
-
- if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
- BUG();
- }
-
- xen_flush_tlb();
-}
-
static void __init xen_post_allocator_init(void)
{
pv_mmu_ops.set_pte = xen_set_pte;
@@ -2070,7 +2046,6 @@ void __init xen_init_mmu_ops(void)
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
- memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
}
/* Protected by xen_reservation_lock. */
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 1b267e75158..64effdc6da9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -499,16 +499,18 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
-static bool __init __early_alloc_p2m(unsigned long pfn)
+static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary)
{
unsigned topidx, mididx, idx;
+ unsigned long *p2m;
+ unsigned long *mid_mfn_p;
topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
/* Pfff.. No boundary cross-over, lets get out. */
- if (!idx)
+ if (!idx && check_boundary)
return false;
WARN(p2m_top[topidx][mididx] == p2m_identity,
@@ -522,24 +524,66 @@ static bool __init __early_alloc_p2m(unsigned long pfn)
return false;
/* Boundary cross-over for the edges: */
- if (idx) {
- unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
- unsigned long *mid_mfn_p;
+ p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_init(p2m);
+ p2m_init(p2m);
- p2m_top[topidx][mididx] = p2m;
+ p2m_top[topidx][mididx] = p2m;
- /* For save/restore we need to MFN of the P2M saved */
-
- mid_mfn_p = p2m_top_mfn_p[topidx];
- WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
- "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
- topidx, mididx);
- mid_mfn_p[mididx] = virt_to_mfn(p2m);
+ /* For save/restore we need to MFN of the P2M saved */
+
+ mid_mfn_p = p2m_top_mfn_p[topidx];
+ WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
+ "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
+ topidx, mididx);
+ mid_mfn_p[mididx] = virt_to_mfn(p2m);
+
+ return true;
+}
+
+static bool __init early_alloc_p2m(unsigned long pfn)
+{
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned long *mid_mfn_p;
+ unsigned long **mid;
+
+ mid = p2m_top[topidx];
+ mid_mfn_p = p2m_top_mfn_p[topidx];
+ if (mid == p2m_mid_missing) {
+ mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_mid_init(mid);
+ p2m_top[topidx] = mid;
+
+ BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
+ }
+ /* And the save/restore P2M tables.. */
+ if (mid_mfn_p == p2m_mid_missing_mfn) {
+ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_mfn_init(mid_mfn_p);
+
+ p2m_top_mfn_p[topidx] = mid_mfn_p;
+ p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
+ /* Note: we don't set mid_mfn_p[midix] here,
+ * look in early_alloc_p2m_middle */
}
- return idx != 0;
+ return true;
+}
+bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+ if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
+ if (!early_alloc_p2m(pfn))
+ return false;
+
+ if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
+ return false;
+
+ if (!__set_phys_to_machine(pfn, mfn))
+ return false;
+ }
+
+ return true;
}
unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e)
@@ -559,35 +603,11 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
{
- unsigned topidx = p2m_top_index(pfn);
- unsigned long *mid_mfn_p;
- unsigned long **mid;
-
- mid = p2m_top[topidx];
- mid_mfn_p = p2m_top_mfn_p[topidx];
- if (mid == p2m_mid_missing) {
- mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-
- p2m_mid_init(mid);
-
- p2m_top[topidx] = mid;
-
- BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
- }
- /* And the save/restore P2M tables.. */
- if (mid_mfn_p == p2m_mid_missing_mfn) {
- mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(mid_mfn_p);
-
- p2m_top_mfn_p[topidx] = mid_mfn_p;
- p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
- /* Note: we don't set mid_mfn_p[midix] here,
- * look in __early_alloc_p2m */
- }
+ WARN_ON(!early_alloc_p2m(pfn));
}
- __early_alloc_p2m(pfn_s);
- __early_alloc_p2m(pfn_e);
+ early_alloc_p2m_middle(pfn_s, true);
+ early_alloc_p2m_middle(pfn_e, true);
for (pfn = pfn_s; pfn < pfn_e; pfn++)
if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
@@ -686,6 +706,7 @@ int m2p_add_override(unsigned long mfn, struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
+ int ret = 0;
pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
@@ -721,6 +742,24 @@ int m2p_add_override(unsigned long mfn, struct page *page,
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags);
+ /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
+ * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
+ * pfn so that the following mfn_to_pfn(mfn) calls will return the
+ * pfn from the m2p_override (the backend pfn) instead.
+ * We need to do this because the pages shared by the frontend
+ * (xen-blkfront) can be already locked (lock_page, called by
+ * do_read_cache_page); when the userspace backend tries to use them
+ * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
+ * do_blockdev_direct_IO is going to try to lock the same pages
+ * again resulting in a deadlock.
+ * As a side effect get_user_pages_fast might not be safe on the
+ * frontend pages while they are being shared with the backend,
+ * because mfn_to_pfn (that ends up being called by GUPF) will
+ * return the backend pfn rather than the frontend pfn. */
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+ if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+ set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
+
return 0;
}
EXPORT_SYMBOL_GPL(m2p_add_override);
@@ -732,6 +771,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
+ int ret = 0;
pfn = page_to_pfn(page);
mfn = get_phys_to_machine(pfn);
@@ -801,6 +841,22 @@ int m2p_remove_override(struct page *page, bool clear_pte)
} else
set_phys_to_machine(pfn, page->index);
+ /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
+ * somewhere in this domain, even before being added to the
+ * m2p_override (see comment above in m2p_add_override).
+ * If there are no other entries in the m2p_override corresponding
+ * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
+ * the original pfn (the one shared by the frontend): the backend
+ * cannot do any IO on this page anymore because it has been
+ * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
+ * the original pfn causes mfn_to_pfn(mfn) to return the frontend
+ * pfn again. */
+ mfn &= ~FOREIGN_FRAME_BIT;
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+ if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+ m2p_find_override(mfn) == NULL)
+ set_phys_to_machine(pfn, mfn);
+
return 0;
}
EXPORT_SYMBOL_GPL(m2p_remove_override);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1ba8dff2675..a4790bf22c5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -26,7 +26,6 @@
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
-
#include "xen-ops.h"
#include "vdso.h"
@@ -84,8 +83,8 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
-static unsigned long __init xen_release_chunk(unsigned long start,
- unsigned long end)
+static unsigned long __init xen_do_chunk(unsigned long start,
+ unsigned long end, bool release)
{
struct xen_memory_reservation reservation = {
.address_bits = 0,
@@ -96,30 +95,138 @@ static unsigned long __init xen_release_chunk(unsigned long start,
unsigned long pfn;
int ret;
- for(pfn = start; pfn < end; pfn++) {
+ for (pfn = start; pfn < end; pfn++) {
+ unsigned long frame;
unsigned long mfn = pfn_to_mfn(pfn);
- /* Make sure pfn exists to start with */
- if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
- continue;
-
- set_xen_guest_handle(reservation.extent_start, &mfn);
+ if (release) {
+ /* Make sure pfn exists to start with */
+ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
+ continue;
+ frame = mfn;
+ } else {
+ if (mfn != INVALID_P2M_ENTRY)
+ continue;
+ frame = pfn;
+ }
+ set_xen_guest_handle(reservation.extent_start, &frame);
reservation.nr_extents = 1;
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
&reservation);
- WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
+ WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
+ release ? "release" : "populate", pfn, ret);
+
if (ret == 1) {
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
+ if (release)
+ break;
+ set_xen_guest_handle(reservation.extent_start, &frame);
+ reservation.nr_extents = 1;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ break;
+ }
len++;
- }
+ } else
+ break;
}
- printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n",
- start, end, len);
+ if (len)
+ printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
+ release ? "Freeing" : "Populating",
+ start, end, len,
+ release ? "freed" : "added");
return len;
}
+static unsigned long __init xen_release_chunk(unsigned long start,
+ unsigned long end)
+{
+ return xen_do_chunk(start, end, true);
+}
+
+static unsigned long __init xen_populate_chunk(
+ const struct e820entry *list, size_t map_size,
+ unsigned long max_pfn, unsigned long *last_pfn,
+ unsigned long credits_left)
+{
+ const struct e820entry *entry;
+ unsigned int i;
+ unsigned long done = 0;
+ unsigned long dest_pfn;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ unsigned long credits = credits_left;
+ unsigned long s_pfn;
+ unsigned long e_pfn;
+ unsigned long pfns;
+ long capacity;
+
+ if (credits <= 0)
+ break;
+
+ if (entry->type != E820_RAM)
+ continue;
+
+ e_pfn = PFN_UP(entry->addr + entry->size);
+
+ /* We only care about E820 after the xen_start_info->nr_pages */
+ if (e_pfn <= max_pfn)
+ continue;
+
+ s_pfn = PFN_DOWN(entry->addr);
+ /* If the E820 falls within the nr_pages, we want to start
+ * at the nr_pages PFN.
+ * If that would mean going past the E820 entry, skip it
+ */
+ if (s_pfn <= max_pfn) {
+ capacity = e_pfn - max_pfn;
+ dest_pfn = max_pfn;
+ } else {
+ /* last_pfn MUST be within E820_RAM regions */
+ if (*last_pfn && e_pfn >= *last_pfn)
+ s_pfn = *last_pfn;
+ capacity = e_pfn - s_pfn;
+ dest_pfn = s_pfn;
+ }
+ /* If we had filled this E820_RAM entry, go to the next one. */
+ if (capacity <= 0)
+ continue;
+
+ if (credits > capacity)
+ credits = capacity;
+
+ pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false);
+ done += pfns;
+ credits_left -= pfns;
+ *last_pfn = (dest_pfn + pfns);
+ }
+ return done;
+}
+
+static void __init xen_set_identity_and_release_chunk(
+ unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
+ unsigned long *released, unsigned long *identity)
+{
+ unsigned long pfn;
+
+ /*
+ * If the PFNs are currently mapped, the VA mapping also needs
+ * to be updated to be 1:1.
+ */
+ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+ (void)HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+
+ if (start_pfn < nr_pages)
+ *released += xen_release_chunk(
+ start_pfn, min(end_pfn, nr_pages));
+
+ *identity += set_phys_range_identity(start_pfn, end_pfn);
+}
+
static unsigned long __init xen_set_identity_and_release(
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
@@ -142,7 +249,6 @@ static unsigned long __init xen_set_identity_and_release(
*/
for (i = 0, entry = list; i < map_size; i++, entry++) {
phys_addr_t end = entry->addr + entry->size;
-
if (entry->type == E820_RAM || i == map_size - 1) {
unsigned long start_pfn = PFN_DOWN(start);
unsigned long end_pfn = PFN_UP(end);
@@ -150,20 +256,19 @@ static unsigned long __init xen_set_identity_and_release(
if (entry->type == E820_RAM)
end_pfn = PFN_UP(entry->addr);
- if (start_pfn < end_pfn) {
- if (start_pfn < nr_pages)
- released += xen_release_chunk(
- start_pfn, min(end_pfn, nr_pages));
+ if (start_pfn < end_pfn)
+ xen_set_identity_and_release_chunk(
+ start_pfn, end_pfn, nr_pages,
+ &released, &identity);
- identity += set_phys_range_identity(
- start_pfn, end_pfn);
- }
start = end;
}
}
- printk(KERN_INFO "Released %lu pages of unused memory\n", released);
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+ if (released)
+ printk(KERN_INFO "Released %lu pages of unused memory\n", released);
+ if (identity)
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
return released;
}
@@ -217,7 +322,9 @@ char * __init xen_memory_setup(void)
int rc;
struct xen_memory_map memmap;
unsigned long max_pages;
+ unsigned long last_pfn = 0;
unsigned long extra_pages = 0;
+ unsigned long populated;
int i;
int op;
@@ -257,8 +364,20 @@ char * __init xen_memory_setup(void)
*/
xen_released_pages = xen_set_identity_and_release(
map, memmap.nr_entries, max_pfn);
+
+ /*
+ * Populate back the non-RAM pages and E820 gaps that had been
+ * released. */
+ populated = xen_populate_chunk(map, memmap.nr_entries,
+ max_pfn, &last_pfn, xen_released_pages);
+
+ xen_released_pages -= populated;
extra_pages += xen_released_pages;
+ if (last_pfn > max_pfn) {
+ max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
+ mem_end = PFN_PHYS(max_pfn);
+ }
/*
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO
* factor the base size. On non-highmem systems, the base
@@ -272,7 +391,6 @@ char * __init xen_memory_setup(void)
*/
extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
extra_pages);
-
i = 0;
while (i < memmap.nr_entries) {
u64 addr = map[i].addr;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 0503c0c493a..afb250d22a6 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -16,6 +16,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/smp.h>
+#include <linux/irq_work.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
@@ -41,10 +42,12 @@ cpumask_var_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, xen_resched_irq);
static DEFINE_PER_CPU(int, xen_callfunc_irq);
static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
+static DEFINE_PER_CPU(int, xen_irq_work);
static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
+static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
/*
* Reschedule call back.
@@ -143,6 +146,17 @@ static int xen_smp_intr_init(unsigned int cpu)
goto fail;
per_cpu(xen_callfuncsingle_irq, cpu) = rc;
+ callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
+ rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
+ cpu,
+ xen_irq_work_interrupt,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ callfunc_name,
+ NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_irq_work, cpu) = rc;
+
return 0;
fail:
@@ -155,6 +169,8 @@ static int xen_smp_intr_init(unsigned int cpu)
if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
NULL);
+ if (per_cpu(xen_irq_work, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
return rc;
}
@@ -265,18 +281,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
set_cpu_possible(cpu, false);
}
- for_each_possible_cpu (cpu) {
- struct task_struct *idle;
-
- if (cpu == 0)
- continue;
-
- idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
-
+ for_each_possible_cpu(cpu)
set_cpu_present(cpu, true);
- }
}
static int __cpuinit
@@ -346,9 +352,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
}
-static int __cpuinit xen_cpu_up(unsigned int cpu)
+static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
{
- struct task_struct *idle = idle_task(cpu);
int rc;
per_cpu(current_task, cpu) = idle;
@@ -418,6 +423,7 @@ static void xen_cpu_die(unsigned int cpu)
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
@@ -480,8 +486,8 @@ static void xen_smp_send_reschedule(int cpu)
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}
-static void xen_send_IPI_mask(const struct cpumask *mask,
- enum ipi_vector vector)
+static void __xen_send_IPI_mask(const struct cpumask *mask,
+ int vector)
{
unsigned cpu;
@@ -493,7 +499,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
{
int cpu;
- xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+ __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
/* Make sure other vcpus get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
@@ -506,10 +512,86 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
static void xen_smp_send_call_function_single_ipi(int cpu)
{
- xen_send_IPI_mask(cpumask_of(cpu),
+ __xen_send_IPI_mask(cpumask_of(cpu),
XEN_CALL_FUNCTION_SINGLE_VECTOR);
}
+static inline int xen_map_vector(int vector)
+{
+ int xen_vector;
+
+ switch (vector) {
+ case RESCHEDULE_VECTOR:
+ xen_vector = XEN_RESCHEDULE_VECTOR;
+ break;
+ case CALL_FUNCTION_VECTOR:
+ xen_vector = XEN_CALL_FUNCTION_VECTOR;
+ break;
+ case CALL_FUNCTION_SINGLE_VECTOR:
+ xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR;
+ break;
+ case IRQ_WORK_VECTOR:
+ xen_vector = XEN_IRQ_WORK_VECTOR;
+ break;
+ default:
+ xen_vector = -1;
+ printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
+ vector);
+ }
+
+ return xen_vector;
+}
+
+void xen_send_IPI_mask(const struct cpumask *mask,
+ int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ __xen_send_IPI_mask(mask, xen_vector);
+}
+
+void xen_send_IPI_all(int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ __xen_send_IPI_mask(cpu_online_mask, xen_vector);
+}
+
+void xen_send_IPI_self(int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ xen_send_IPI_one(smp_processor_id(), xen_vector);
+}
+
+void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
+{
+ unsigned cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ if (!(num_online_cpus() > 1))
+ return;
+
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
+ if (this_cpu == cpu)
+ continue;
+
+ xen_smp_send_call_function_single_ipi(cpu);
+ }
+}
+
+void xen_send_IPI_allbutself(int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector);
+}
+
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
irq_enter();
@@ -530,6 +612,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
+{
+ irq_enter();
+ irq_work_run();
+ inc_irq_stat(apic_irq_work_irqs);
+ irq_exit();
+
+ return IRQ_HANDLED;
+}
+
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_smp_prepare_cpus,
@@ -562,10 +654,10 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
xen_init_lock_cpu(0);
}
-static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
+static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int rc;
- rc = native_cpu_up(cpu);
+ rc = native_cpu_up(cpu, tidle);
WARN_ON (xen_smp_intr_init(cpu));
return rc;
}
@@ -576,6 +668,7 @@ static void xen_hvm_cpu_die(unsigned int cpu)
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
native_cpu_die(cpu);
}
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
new file mode 100644
index 00000000000..8981a76d081
--- /dev/null
+++ b/arch/x86/xen/smp.h
@@ -0,0 +1,12 @@
+#ifndef _XEN_SMP_H
+
+extern void xen_send_IPI_mask(const struct cpumask *mask,
+ int vector);
+extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector);
+extern void xen_send_IPI_allbutself(int vector);
+extern void physflat_send_IPI_allbutself(int vector);
+extern void xen_send_IPI_all(int vector);
+extern void xen_send_IPI_self(int vector);
+
+#endif
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index d69cc6c3f80..83e866d714c 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -440,12 +440,12 @@ static int __init xen_spinlock_debugfs(void)
debugfs_create_u64("time_total", 0444, d_spin_debug,
&spinlock_stats.time_total);
- xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
- spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
- xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
- spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
- xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
- spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+ debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
+ debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
+ debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
return 0;
}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index b040b0e518c..f9643fc50de 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -14,6 +14,7 @@
#include <asm/thread_info.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <asm/asm.h>
#include <xen/interface/xen.h>
@@ -137,10 +138,7 @@ iret_restore_end:
1: iret
xen_iret_end_crit:
-.section __ex_table, "a"
- .align 4
- .long 1b, iret_exc
-.previous
+ _ASM_EXTABLE(1b, iret_exc)
hyper_iret:
/* put this out of line since its very rarely used */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index b095739ccd4..202d4c15015 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -28,7 +28,6 @@ void xen_setup_shared_info(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
-void xen_ident_map_ISA(void);
void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn;
@@ -92,11 +91,15 @@ struct dom0_vga_console_info;
#ifdef CONFIG_XEN_DOM0
void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
+void __init xen_init_apic(void);
#else
static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
size_t size)
{
}
+static inline void __init xen_init_apic(void)
+{
+}
#endif
/* Declare an asm function, along with symbols needed to make it