diff options
Diffstat (limited to 'xen.pvops.patch')
-rw-r--r-- | xen.pvops.patch | 1062 |
1 files changed, 898 insertions, 164 deletions
diff --git a/xen.pvops.patch b/xen.pvops.patch index 6a8c1d4..62e411a 100644 --- a/xen.pvops.patch +++ b/xen.pvops.patch @@ -49,6 +49,19 @@ index 29a6ff8..81f9b94 100644 Settings for the IBM Calgary hardware IOMMU currently found in IBM pSeries and xSeries machines: +diff --git a/Makefile b/Makefile +index 6a29b82..83813cc 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 32 +-EXTRAVERSION = .23 ++EXTRAVERSION = .24 + NAME = Man-Eating Seals of Antiquity + + # *DOCUMENTATION* diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 8d3c79c..7d09a09 100644 --- a/arch/ia64/include/asm/dma-mapping.h @@ -669,10 +682,27 @@ index b399988..30cbf49 100644 extern void __init dmi_check_skip_isa_align(void); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index af6fd36..088f079 100644 +index af6fd36..430e3cc 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h -@@ -76,6 +76,11 @@ extern struct list_head pgd_list; +@@ -15,7 +15,6 @@ + : (prot)) + + #ifndef __ASSEMBLY__ +- + /* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. +@@ -26,6 +25,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; + extern spinlock_t pgd_lock; + extern struct list_head pgd_list; + ++extern struct mm_struct *pgd_page_get_mm(struct page *page); ++ + #ifdef CONFIG_PARAVIRT + #include <asm/paravirt.h> + #else /* !CONFIG_PARAVIRT */ +@@ -76,6 +77,11 @@ extern struct list_head pgd_list; #endif /* CONFIG_PARAVIRT */ @@ -684,7 +714,7 @@ index af6fd36..088f079 100644 /* * The following only work if pte_present() is true. * Undefined behaviour if not.. -@@ -397,6 +402,9 @@ static inline unsigned long pages_to_mb(unsigned long npg) +@@ -397,6 +403,9 @@ static inline unsigned long pages_to_mb(unsigned long npg) #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) @@ -694,7 +724,7 @@ index af6fd36..088f079 100644 #if PAGETABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { -@@ -616,6 +624,9 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) +@@ -616,6 +625,9 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) memcpy(dst, src, count * sizeof(pgd_t)); } @@ -1311,10 +1341,18 @@ index 0000000..e4fe299 + +#endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile -index d8e5d0c..6e80af9 100644 +index d8e5d0c..d4f3b05 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile -@@ -111,6 +111,7 @@ obj-$(CONFIG_X86_MRST) += mrst.o +@@ -11,6 +11,7 @@ ifdef CONFIG_FUNCTION_TRACER + CFLAGS_REMOVE_tsc.o = -pg + CFLAGS_REMOVE_rtc.o = -pg + CFLAGS_REMOVE_paravirt-spinlocks.o = -pg ++CFLAGS_REMOVE_pvclock.o = -pg + CFLAGS_REMOVE_ftrace.o = -pg + CFLAGS_REMOVE_early_printk.o = -pg + endif +@@ -111,6 +112,7 @@ obj-$(CONFIG_X86_MRST) += mrst.o microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o @@ -1642,7 +1680,7 @@ index 082089e..8d34362 100644 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c -index 1acd1c4..fbcfe26 100644 +index 0da6495..42d1fe2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -63,7 +63,12 @@ @@ -1755,6 +1793,20 @@ index 1acd1c4..fbcfe26 100644 return 0; } #endif +diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c +index 7ff61d6..d1e6e60 100644 +--- a/arch/x86/kernel/apic/nmi.c ++++ b/arch/x86/kernel/apic/nmi.c +@@ -558,6 +558,9 @@ void arch_trigger_all_cpu_backtrace(void) + { + int i; + ++ if (!cpu_has_apic) ++ return; ++ + cpumask_copy(&backtrace_mask, cpu_online_mask); + + printk(KERN_INFO "sending NMI to all CPUs:\n"); diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index f4361b5..404e458 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile @@ -3124,6 +3176,48 @@ index 06630d2..ad895ae 100644 obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o +diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c +index 1739358..e003b83 100644 +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -228,7 +228,16 @@ void vmalloc_sync_all(void) + + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { +- if (!vmalloc_sync_one(page_address(page), address)) ++ spinlock_t *pgt_lock; ++ int ret; ++ ++ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; ++ ++ spin_lock(pgt_lock); ++ ret = vmalloc_sync_one(page_address(page), address); ++ spin_unlock(pgt_lock); ++ ++ if (!ret) + break; + } + spin_unlock_irqrestore(&pgd_lock, flags); +@@ -340,11 +349,19 @@ void vmalloc_sync_all(void) + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; ++ spinlock_t *pgt_lock; ++ + pgd = (pgd_t *)page_address(page) + pgd_index(address); ++ ++ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; ++ spin_lock(pgt_lock); ++ + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); ++ ++ spin_unlock(pgt_lock); + } + spin_unlock_irqrestore(&pgd_lock, flags); + } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bc..892b8eb 100644 --- a/arch/x86/mm/gup.c @@ -3221,7 +3315,7 @@ index e78cd0e..fb91994 100644 #ifdef CONFIG_STRICT_DEVMEM diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index c9ba9de..103e324 100644 +index c9ba9de..1fcc191 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -4,6 +4,9 @@ @@ -3251,7 +3345,49 @@ index c9ba9de..103e324 100644 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { return (pte_t *)__get_free_page(PGALLOC_GFP); -@@ -288,6 +301,12 @@ out: +@@ -86,7 +99,19 @@ static inline void pgd_list_del(pgd_t *pgd) + #define UNSHARED_PTRS_PER_PGD \ + (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) + +-static void pgd_ctor(pgd_t *pgd) ++ ++static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) ++{ ++ BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); ++ virt_to_page(pgd)->index = (pgoff_t)mm; ++} ++ ++struct mm_struct *pgd_page_get_mm(struct page *page) ++{ ++ return (struct mm_struct *)page->index; ++} ++ ++static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) + { + /* If the pgd points to a shared pagetable level (either the + ptes in non-PAE, or shared PMD in PAE), then just copy the +@@ -104,8 +129,10 @@ static void pgd_ctor(pgd_t *pgd) + } + + /* list required to sync kernel mapping updates */ +- if (!SHARED_KERNEL_PMD) ++ if (!SHARED_KERNEL_PMD) { ++ pgd_set_mm(pgd, mm); + pgd_list_add(pgd); ++ } + } + + static void pgd_dtor(pgd_t *pgd) +@@ -271,7 +298,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + */ + spin_lock_irqsave(&pgd_lock, flags); + +- pgd_ctor(pgd); ++ pgd_ctor(mm, pgd); + pgd_prepopulate_pmd(mm, pgd, pmds); + + spin_unlock_irqrestore(&pgd_lock, flags); +@@ -288,6 +315,12 @@ out: void pgd_free(struct mm_struct *mm, pgd_t *pgd) { @@ -3728,7 +3864,7 @@ index 0000000..21a3089 +#endif +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index 942ccf1..ea32198 100644 +index 942ccf1..fd3803e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -11,6 +11,7 @@ @@ -3882,7 +4018,7 @@ index 942ccf1..ea32198 100644 } asm(XEN_EMULATE_PREFIX "cpuid" -@@ -215,13 +242,15 @@ static __init void xen_init_cpuid_mask(void) +@@ -215,32 +242,18 @@ static __init void xen_init_cpuid_mask(void) unsigned int ax, bx, cx, dx; cpuid_leaf1_edx_mask = @@ -3901,8 +4037,28 @@ index 942ccf1..ea32198 100644 + (1 << X86_FEATURE_APIC) | /* disable local APIC */ (1 << X86_FEATURE_ACPI)); /* disable ACPI */ - ax = 1; -@@ -406,7 +435,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) +- ax = 1; +- cx = 0; +- xen_cpuid(&ax, &bx, &cx, &dx); +- +- /* cpuid claims we support xsave; try enabling it to see what happens */ +- if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { +- unsigned long cr4; +- +- set_in_cr4(X86_CR4_OSXSAVE); +- +- cr4 = read_cr4(); +- +- if ((cr4 & X86_CR4_OSXSAVE) == 0) +- cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); +- +- clear_in_cr4(X86_CR4_OSXSAVE); +- } ++ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); /* disable XSAVE */ + } + + static void xen_set_debugreg(int reg, unsigned long val) +@@ -406,7 +419,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) pte = pfn_pte(pfn, PAGE_KERNEL_RO); @@ -3911,7 +4067,7 @@ index 942ccf1..ea32198 100644 BUG(); frames[f] = mfn; -@@ -517,13 +546,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, +@@ -517,13 +530,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, return 0; #ifdef CONFIG_X86_MCE } else if (addr == (unsigned long)machine_check) { @@ -3931,7 +4087,7 @@ index 942ccf1..ea32198 100644 #endif /* CONFIG_X86_64 */ info->address = addr; -@@ -679,6 +708,18 @@ static void xen_set_iopl_mask(unsigned mask) +@@ -679,6 +692,18 @@ static void xen_set_iopl_mask(unsigned mask) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); } @@ -3950,7 +4106,7 @@ index 942ccf1..ea32198 100644 static void xen_io_delay(void) { } -@@ -716,7 +757,7 @@ static u32 xen_safe_apic_wait_icr_idle(void) +@@ -716,7 +741,7 @@ static u32 xen_safe_apic_wait_icr_idle(void) return 0; } @@ -3959,7 +4115,7 @@ index 942ccf1..ea32198 100644 { apic->read = xen_apic_read; apic->write = xen_apic_write; -@@ -728,7 +769,6 @@ static void set_xen_basic_apic_ops(void) +@@ -728,7 +753,6 @@ static void set_xen_basic_apic_ops(void) #endif @@ -3967,7 +4123,7 @@ index 942ccf1..ea32198 100644 static void xen_clts(void) { struct multicall_space mcs; -@@ -811,6 +851,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) +@@ -811,6 +835,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) Xen console noise. */ break; @@ -3979,7 +4135,7 @@ index 942ccf1..ea32198 100644 default: ret = native_write_msr_safe(msr, low, high); } -@@ -849,8 +894,6 @@ void xen_setup_vcpu_info_placement(void) +@@ -849,8 +878,6 @@ void xen_setup_vcpu_info_placement(void) /* xen_vcpu_setup managed to place the vcpu_info within the percpu area for all cpus, so make use of it */ if (have_vcpu_info_placement) { @@ -3988,7 +4144,7 @@ index 942ccf1..ea32198 100644 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); -@@ -923,10 +966,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { +@@ -923,10 +950,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, }; @@ -3999,7 +4155,7 @@ index 942ccf1..ea32198 100644 static const struct pv_cpu_ops xen_cpu_ops __initdata = { .cpuid = xen_cpuid, -@@ -978,6 +1017,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { +@@ -978,6 +1001,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .load_sp0 = xen_load_sp0, .set_iopl_mask = xen_set_iopl_mask, @@ -4007,7 +4163,7 @@ index 942ccf1..ea32198 100644 .io_delay = xen_io_delay, /* Xen takes care of %gs when switching to usermode for us */ -@@ -1020,15 +1060,40 @@ static void xen_machine_halt(void) +@@ -1020,15 +1044,40 @@ static void xen_machine_halt(void) xen_reboot(SHUTDOWN_poweroff); } @@ -4049,7 +4205,7 @@ index 942ccf1..ea32198 100644 .shutdown = xen_machine_halt, .crash_shutdown = xen_crash_shutdown, .emergency_restart = xen_emergency_restart, -@@ -1061,10 +1126,11 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1061,10 +1110,11 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; @@ -4062,7 +4218,7 @@ index 942ccf1..ea32198 100644 pv_cpu_ops = xen_cpu_ops; pv_apic_ops = xen_apic_ops; -@@ -1072,13 +1138,7 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1072,13 +1122,7 @@ asmlinkage void __init xen_start_kernel(void) x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; @@ -4077,7 +4233,7 @@ index 942ccf1..ea32198 100644 /* * Set up some pagetable state before starting to set any ptes. -@@ -1116,6 +1176,10 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1116,6 +1160,10 @@ asmlinkage void __init xen_start_kernel(void) */ xen_setup_stackprotector(); @@ -4088,7 +4244,7 @@ index 942ccf1..ea32198 100644 xen_init_irq_ops(); xen_init_cpuid_mask(); -@@ -1144,6 +1208,8 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1144,6 +1192,8 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; @@ -4097,7 +4253,7 @@ index 942ccf1..ea32198 100644 /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -@@ -1153,6 +1219,10 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1153,6 +1203,10 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); @@ -4108,7 +4264,7 @@ index 942ccf1..ea32198 100644 init_mm.pgd = pgd; -@@ -1162,6 +1232,14 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1162,6 +1216,14 @@ asmlinkage void __init xen_start_kernel(void) if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; @@ -4123,7 +4279,7 @@ index 942ccf1..ea32198 100644 /* set the limit of our address space */ xen_reserve_top(); -@@ -1184,6 +1262,16 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1184,6 +1246,16 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); @@ -4140,7 +4296,7 @@ index 942ccf1..ea32198 100644 } xen_raw_console_write("about to get started...\n"); -@@ -1197,3 +1285,126 @@ asmlinkage void __init xen_start_kernel(void) +@@ -1197,3 +1269,126 @@ asmlinkage void __init xen_start_kernel(void) x86_64_start_reservations((char *)__pa_symbol(&boot_params)); #endif } @@ -4268,7 +4424,7 @@ index 942ccf1..ea32198 100644 +} +#endif diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c -index 350a3de..c3364f8 100644 +index 350a3de..16a8e25 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,6 +42,7 @@ @@ -4354,7 +4510,7 @@ index 350a3de..c3364f8 100644 -#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) -#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) -+static unsigned long max_p2m_pfn __read_mostly; ++unsigned long xen_max_p2m_pfn __read_mostly; -/* Placeholder for holes in the address space */ -static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = @@ -4493,7 +4649,7 @@ index 350a3de..c3364f8 100644 - for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { - unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; - p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); -+ for (pfn = 0; pfn < max_p2m_pfn; pfn += P2M_PER_PAGE) { ++ for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx = p2m_mid_index(pfn); + unsigned long **mid; @@ -4536,7 +4692,7 @@ index 350a3de..c3364f8 100644 - virt_to_mfn(p2m_top_mfn_list); - HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; + virt_to_mfn(p2m_top_mfn); -+ HYPERVISOR_shared_info->arch.max_pfn = max_p2m_pfn; ++ HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; } /* Set up p2m_top to point to the domain-builder provided p2m pages */ @@ -4545,7 +4701,7 @@ index 350a3de..c3364f8 100644 unsigned pfn; - for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { -+ max_p2m_pfn = max_pfn; ++ xen_max_p2m_pfn = max_pfn; + + p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_init(p2m_missing); @@ -6019,7 +6175,7 @@ index 0000000..0f45638 +early_param("xen_emul_unplug", parse_xen_emul_unplug); +#endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c -index ad0047f..a0db643 100644 +index ad0047f..915b0c3 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -10,6 +10,7 @@ @@ -6040,28 +6196,41 @@ index ad0047f..a0db643 100644 #include <xen/features.h> #include "xen-ops.h" -@@ -32,25 +35,157 @@ extern void xen_sysenter_target(void); +@@ -32,25 +35,177 @@ extern void xen_sysenter_target(void); extern void xen_syscall_target(void); extern void xen_syscall32_target(void); +/* Amount of extra memory space we add to the e820 ranges */ +phys_addr_t xen_extra_mem_start, xen_extra_mem_size; + ++/* ++ * The maximum amount of extra memory compared to the base size. The ++ * main scaling factor is the size of struct page. At extreme ratios ++ * of base:extra, all the base memory can be filled with page ++ * structures for the extra memory, leaving no space for anything ++ * else. ++ * ++ * 10x seems like a reasonable balance between scaling flexibility and ++ * leaving a practically usable system. ++ */ ++#define EXTRA_MEM_RATIO (10) ++ +static __init void xen_add_extra_mem(unsigned long pages) +{ + u64 size = (u64)pages * PAGE_SIZE; ++ u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; + + if (!pages) + return; + -+ e820_add_region(xen_extra_mem_start + xen_extra_mem_size, size, E820_RAM); ++ e820_add_region(extra_start, size, E820_RAM); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + -+ reserve_early(xen_extra_mem_start + xen_extra_mem_size, -+ xen_extra_mem_start + xen_extra_mem_size + size, -+ "XEN EXTRA"); ++ reserve_early(extra_start, extra_start + size, "XEN EXTRA"); + + xen_extra_mem_size += size; ++ ++ xen_max_p2m_pfn = PFN_DOWN(extra_start + size); +} + +static unsigned long __init xen_release_chunk(phys_addr_t start_addr, @@ -6145,6 +6314,7 @@ index ad0047f..a0db643 100644 + int rc; + struct xen_memory_map memmap; + unsigned long extra_pages = 0; ++ unsigned long extra_limit; + int op; + int i; @@ -6170,22 +6340,27 @@ index ad0047f..a0db643 100644 + BUG_ON(rc); e820.nr_map = 0; -- -- e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); + xen_extra_mem_start = mem_end; + for (i = 0; i < memmap.nr_entries; i++) { + unsigned long long end = map[i].addr + map[i].size; + + if (map[i].type == E820_RAM) { -+ if (end > mem_end) { ++ if (map[i].addr < mem_end && end > mem_end) { + /* Truncate region to max_mem. */ -+ map[i].size -= end - mem_end; ++ u64 delta = end - mem_end; + -+ extra_pages += PFN_DOWN(end - mem_end); ++ map[i].size -= delta; ++ extra_pages += PFN_DOWN(delta); ++ ++ end = mem_end; + } -+ } else if (map[i].type != E820_RAM) ++ } + +- e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); ++ if (end > xen_extra_mem_start) + xen_extra_mem_start = end; + ++ /* If region is non-RAM or below mem_end, add what remains */ + if ((map[i].type != E820_RAM || map[i].addr < mem_end) && + map[i].size > 0) + e820_add_region(map[i].addr, map[i].size, map[i].type); @@ -6201,18 +6376,37 @@ index ad0047f..a0db643 100644 */ e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); -@@ -67,6 +202,10 @@ char * __init xen_memory_setup(void) +@@ -67,6 +222,29 @@ char * __init xen_memory_setup(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); + ++ /* ++ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO ++ * factor the base size. On non-highmem systems, the base ++ * size is the full initial memory allocation; on highmem it ++ * is limited to the max size of lowmem, so that it doesn't ++ * get completely filled. ++ * ++ * In principle there could be a problem in lowmem systems if ++ * the initial memory is also very large with respect to ++ * lowmem, but we won't try to deal with that here. ++ */ ++ extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), ++ max_pfn + extra_pages); ++ ++ if (extra_limit >= max_pfn) ++ extra_pages = extra_limit - max_pfn; ++ else ++ extra_pages = 0; ++ + xen_add_extra_mem(extra_pages); + return "Xen"; } -@@ -156,6 +295,8 @@ void __init xen_arch_setup(void) +@@ -156,6 +334,8 @@ void __init xen_arch_setup(void) struct physdev_set_iopl set_iopl; int rc; @@ -6221,7 +6415,7 @@ index ad0047f..a0db643 100644 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); -@@ -182,13 +323,17 @@ void __init xen_arch_setup(void) +@@ -182,13 +362,17 @@ void __init xen_arch_setup(void) } #endif @@ -6506,20 +6700,21 @@ index 0000000..1cd7f4d + } +} diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h -index f9153a3..3bcdbed 100644 +index f9153a3..ebbee21 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h -@@ -30,6 +30,9 @@ void xen_setup_machphys_mapping(void); +@@ -30,6 +30,10 @@ void xen_setup_machphys_mapping(void); pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); void xen_reserve_top(void); +void xen_ident_map_ISA(void); ++extern unsigned long xen_max_p2m_pfn; + +void xen_set_pat(u64); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); -@@ -38,6 +41,10 @@ void xen_enable_sysenter(void); +@@ -38,6 +42,10 @@ void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); @@ -6530,7 +6725,7 @@ index f9153a3..3bcdbed 100644 void __init xen_build_dynamic_phys_to_machine(void); void xen_init_irq_ops(void); -@@ -46,11 +53,8 @@ void xen_setup_runstate_info(int cpu); +@@ -46,11 +54,8 @@ void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); @@ -6544,7 +6739,7 @@ index f9153a3..3bcdbed 100644 irqreturn_t xen_debug_interrupt(int irq, void *dev_id); -@@ -82,6 +86,23 @@ static inline void xen_uninit_lock_cpu(int cpu) +@@ -82,6 +87,23 @@ static inline void xen_uninit_lock_cpu(int cpu) } #endif @@ -6568,7 +6763,7 @@ index f9153a3..3bcdbed 100644 /* Declare an asm function, along with symbols needed to make it inlineable */ #define DECL_ASM(ret, name, ...) \ -@@ -101,4 +122,6 @@ void xen_sysret32(void); +@@ -101,4 +123,6 @@ void xen_sysret32(void); void xen_sysret64(void); void xen_adjust_exception_frame(void); @@ -6887,10 +7082,10 @@ index 40d395e..7ba143d 100644 acpi_status status = AE_OK; diff --git a/drivers/acpi/processor_xen.c b/drivers/acpi/processor_xen.c new file mode 100644 -index 0000000..2f37c9c +index 0000000..305398d --- /dev/null +++ b/drivers/acpi/processor_xen.c -@@ -0,0 +1,616 @@ +@@ -0,0 +1,651 @@ +/* + * processor_xen.c - ACPI Processor Driver for xen + * @@ -6932,6 +7127,7 @@ index 0000000..2f37c9c +#include <acpi/acpi_drivers.h> +#include <acpi/processor.h> +#include <xen/acpi.h> ++#include <xen/pcpu.h> + +#define PREFIX "ACPI: " + @@ -6975,6 +7171,42 @@ index 0000000..2f37c9c + }, +}; + ++static int is_processor_present(acpi_handle handle) ++{ ++ acpi_status status; ++ unsigned long long sta = 0; ++ ++ ++ status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); ++ ++ if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT)) ++ return 1; ++ ++ /* ++ * _STA is mandatory for a processor that supports hot plug ++ */ ++ if (status == AE_NOT_FOUND) ++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, ++ "Processor does not support hot plug\n")); ++ else ++ ACPI_EXCEPTION((AE_INFO, status, ++ "Processor Device is not present")); ++ return 0; ++} ++ ++static acpi_status ++xen_acpi_processor_hotadd_init(struct acpi_processor *pr, int *p_cpu) ++{ ++ if (!is_processor_present(pr->handle)) ++ return AE_ERROR; ++ ++ if (processor_cntl_xen_notify(pr, ++ PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD)) ++ return AE_ERROR; ++ ++ return AE_OK; ++} ++ +static int xen_acpi_processor_get_info(struct acpi_device *device) +{ + acpi_status status = 0; @@ -7057,14 +7289,12 @@ index 0000000..2f37c9c + * They should be ignored _iff they are physically not present. + * + */ -+#if 0 -+ if (pr->id == -1) { ++ if (xen_pcpu_index(pr->acpi_id, 1) == -1) { + if (ACPI_FAILURE -+ (acpi_processor_hotadd_init(pr->handle, &pr->id))) { ++ (xen_acpi_processor_hotadd_init(pr, &pr->id))) { + return -ENODEV; + } + } -+#endif + + /* + * On some boxes several processors use the same processor bus id. @@ -9277,7 +9507,7 @@ index b2f71f7..b7feb84 100644 help The network device frontend driver allows the kernel to diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c -index 1a11d95..aa9130b 100644 +index 1a11d95..3f71199 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -42,6 +42,7 @@ @@ -9504,9 +9734,9 @@ index 1a11d95..aa9130b 100644 + np->smart_poll.feature_smart_poll = 0; + } + ++ hrtimer_init(&np->smart_poll.timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); + if (np->smart_poll.feature_smart_poll) { -+ hrtimer_init(&np->smart_poll.timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); + np->smart_poll.timer.function = smart_poll_function; + np->smart_poll.netdev = dev; + np->smart_poll.smart_poll_freq = DEFAULT_SMART_POLL_FREQ; @@ -9654,10 +9884,10 @@ index 5753036..8e6e6d1 100644 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c -index 2498602..fd89530 100644 +index ba83495..1506d4a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c -@@ -3282,7 +3282,7 @@ int __init intel_iommu_init(void) +@@ -3278,7 +3278,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ @@ -9666,7 +9896,7 @@ index 2498602..fd89530 100644 return -ENODEV; iommu_init_mempool(); -@@ -3303,7 +3303,9 @@ int __init intel_iommu_init(void) +@@ -3299,7 +3299,9 @@ int __init intel_iommu_init(void) "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); init_timer(&unmap_timer); @@ -9704,7 +9934,7 @@ index e03fe98..f9db891 100644 { int pos; diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c -index f9cf317..a77a46f 100644 +index 0fb1d05..c7e8a69 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -19,6 +19,9 @@ @@ -9717,7 +9947,7 @@ index f9cf317..a77a46f 100644 #include "pci.h" #include "msi.h" -@@ -366,6 +369,20 @@ static void __pci_restore_msix_state(struct pci_dev *dev) +@@ -391,6 +394,20 @@ static void __pci_restore_msix_state(struct pci_dev *dev) void pci_restore_msi_state(struct pci_dev *dev) { @@ -11298,6 +11528,406 @@ index 54cd916..dc72563 100644 return -ENODEV; /* Nothing to do if running in dom0. */ +diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig +index 3711b88..4fcb4c5 100644 +--- a/drivers/watchdog/Kconfig ++++ b/drivers/watchdog/Kconfig +@@ -975,6 +975,16 @@ config WATCHDOG_RIO + + # XTENSA Architecture + ++# Xen Architecture ++ ++config XEN_WDT ++ tristate "Xen Watchdog support" ++ depends on XEN ++ help ++ Say Y here to support the hypervisor watchdog capability provided ++ by Xen 4.0 and newer. The watchdog timeout period is normally one ++ minute but can be changed with a boot-time parameter. ++ + # + # ISA-based Watchdog Cards + # +diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile +index 699199b..2f6739a 100644 +--- a/drivers/watchdog/Makefile ++++ b/drivers/watchdog/Makefile +@@ -141,6 +141,9 @@ obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o + + # XTENSA Architecture + ++# Xen ++obj-$(CONFIG_XEN_WDT) += xen_wdt.o ++ + # Architecture Independant + obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o + obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o +diff --git a/drivers/watchdog/xen_wdt.c b/drivers/watchdog/xen_wdt.c +new file mode 100644 +index 0000000..bcfaafb +--- /dev/null ++++ b/drivers/watchdog/xen_wdt.c +@@ -0,0 +1,359 @@ ++/* ++ * Xen Watchdog Driver ++ * ++ * (c) Copyright 2010 Novell, Inc. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++#define DRV_NAME "wdt" ++#define DRV_VERSION "0.01" ++#define PFX DRV_NAME ": " ++ ++#include <linux/bug.h> ++#include <linux/errno.h> ++#include <linux/fs.h> ++#include <linux/hrtimer.h> ++#include <linux/kernel.h> ++#include <linux/ktime.h> ++#include <linux/init.h> ++#include <linux/miscdevice.h> ++#include <linux/module.h> ++#include <linux/moduleparam.h> ++#include <linux/platform_device.h> ++#include <linux/spinlock.h> ++#include <linux/uaccess.h> ++#include <linux/watchdog.h> ++#include <xen/xen.h> ++#include <asm/xen/hypercall.h> ++#include <xen/interface/sched.h> ++ ++static struct platform_device *platform_device; ++static DEFINE_SPINLOCK(wdt_lock); ++static struct sched_watchdog wdt; ++static __kernel_time_t wdt_expires; ++static bool is_active, expect_release; ++ ++#define WATCHDOG_TIMEOUT 60 /* in seconds */ ++static unsigned int timeout = WATCHDOG_TIMEOUT; ++module_param(timeout, uint, S_IRUGO); ++MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds " ++ "(default=" __MODULE_STRING(WATCHDOG_TIMEOUT) ")"); ++ ++static bool nowayout = WATCHDOG_NOWAYOUT; ++module_param(nowayout, bool, S_IRUGO); ++MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " ++ "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); ++ ++static inline __kernel_time_t set_timeout(void) ++{ ++ wdt.timeout = timeout; ++ return ktime_to_timespec(ktime_get()).tv_sec + timeout; ++} ++ ++static int xen_wdt_start(void) ++{ ++ __kernel_time_t expires; ++ int err; ++ ++ spin_lock(&wdt_lock); ++ ++ expires = set_timeout(); ++ if (!wdt.id) ++ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt); ++ else ++ err = -EBUSY; ++ if (err > 0) { ++ wdt.id = err; ++ wdt_expires = expires; ++ err = 0; ++ } else ++ BUG_ON(!err); ++ ++ spin_unlock(&wdt_lock); ++ ++ return err; ++} ++ ++static int xen_wdt_stop(void) ++{ ++ int err = 0; ++ ++ spin_lock(&wdt_lock); ++ ++ wdt.timeout = 0; ++ if (wdt.id) ++ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt); ++ if (!err) ++ wdt.id = 0; ++ ++ spin_unlock(&wdt_lock); ++ ++ return err; ++} ++ ++static int xen_wdt_kick(void) ++{ ++ __kernel_time_t expires; ++ int err; ++ ++ spin_lock(&wdt_lock); ++ ++ expires = set_timeout(); ++ if (wdt.id) ++ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt); ++ else ++ err = -ENXIO; ++ if (!err) ++ wdt_expires = expires; ++ ++ spin_unlock(&wdt_lock); ++ ++ return err; ++} ++ ++static int xen_wdt_open(struct inode *inode, struct file *file) ++{ ++ int err; ++ ++ /* /dev/watchdog can only be opened once */ ++ if (xchg(&is_active, true)) ++ return -EBUSY; ++ ++ err = xen_wdt_start(); ++ if (err == -EBUSY) ++ err = xen_wdt_kick(); ++ return err ?: nonseekable_open(inode, file); ++} ++ ++static int xen_wdt_release(struct inode *inode, struct file *file) ++{ ++ if (expect_release) ++ xen_wdt_stop(); ++ else { ++ printk(KERN_CRIT PFX ++ "unexpected close, not stopping watchdog!\n"); ++ xen_wdt_kick(); ++ } ++ is_active = false; ++ expect_release = false; ++ return 0; ++} ++ ++static ssize_t xen_wdt_write(struct file *file, const char __user *data, ++ size_t len, loff_t *ppos) ++{ ++ /* See if we got the magic character 'V' and reload the timer */ ++ if (len) { ++ if (!nowayout) { ++ size_t i; ++ ++ /* in case it was set long ago */ ++ expect_release = false; ++ ++ /* scan to see whether or not we got the magic ++ character */ ++ for (i = 0; i != len; i++) { ++ char c; ++ if (get_user(c, data + i)) ++ return -EFAULT; ++ if (c == 'V') ++ expect_release = true; ++ } ++ } ++ ++ /* someone wrote to us, we should reload the timer */ ++ xen_wdt_kick(); ++ } ++ return len; ++} ++ ++static long xen_wdt_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ int new_options, retval = -EINVAL; ++ int new_timeout; ++ int __user *argp = (void __user *)arg; ++ static const struct watchdog_info ident = { ++ .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE, ++ .firmware_version = 0, ++ .identity = DRV_NAME, ++ }; ++ ++ switch (cmd) { ++ case WDIOC_GETSUPPORT: ++ return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; ++ ++ case WDIOC_GETSTATUS: ++ case WDIOC_GETBOOTSTATUS: ++ return put_user(0, argp); ++ ++ case WDIOC_SETOPTIONS: ++ if (get_user(new_options, argp)) ++ return -EFAULT; ++ ++ if (new_options & WDIOS_DISABLECARD) ++ retval = xen_wdt_stop(); ++ if (new_options & WDIOS_ENABLECARD) { ++ retval = xen_wdt_start(); ++ if (retval == -EBUSY) ++ retval = xen_wdt_kick(); ++ } ++ return retval; ++ ++ case WDIOC_KEEPALIVE: ++ xen_wdt_kick(); ++ return 0; ++ ++ case WDIOC_SETTIMEOUT: ++ if (get_user(new_timeout, argp)) ++ return -EFAULT; ++ if (!new_timeout) ++ return -EINVAL; ++ timeout = new_timeout; ++ xen_wdt_kick(); ++ /* fall through */ ++ case WDIOC_GETTIMEOUT: ++ return put_user(timeout, argp); ++ ++ case WDIOC_GETTIMELEFT: ++ retval = wdt_expires - ktime_to_timespec(ktime_get()).tv_sec; ++ return put_user(retval, argp); ++ } ++ ++ return -ENOTTY; ++} ++ ++static const struct file_operations xen_wdt_fops = { ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++ .write = xen_wdt_write, ++ .unlocked_ioctl = xen_wdt_ioctl, ++ .open = xen_wdt_open, ++ .release = xen_wdt_release, ++}; ++ ++static struct miscdevice xen_wdt_miscdev = { ++ .minor = WATCHDOG_MINOR, ++ .name = "watchdog", ++ .fops = &xen_wdt_fops, ++}; ++ ++static int __devinit xen_wdt_probe(struct platform_device *dev) ++{ ++ struct sched_watchdog wd = { .id = ~0 }; ++ int ret = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wd); ++ ++ switch (ret) { ++ case -EINVAL: ++ if (!timeout) { ++ timeout = WATCHDOG_TIMEOUT; ++ printk(KERN_INFO PFX ++ "timeout value invalid, using %d\n", timeout); ++ } ++ ++ ret = misc_register(&xen_wdt_miscdev); ++ if (ret) { ++ printk(KERN_ERR PFX ++ "cannot register miscdev on minor=%d (%d)\n", ++ WATCHDOG_MINOR, ret); ++ break; ++ } ++ ++ printk(KERN_INFO PFX ++ "initialized (timeout=%ds, nowayout=%d)\n", ++ timeout, nowayout); ++ break; ++ ++ case -ENOSYS: ++ printk(KERN_INFO PFX "not supported\n"); ++ ret = -ENODEV; ++ break; ++ ++ default: ++ printk(KERN_INFO PFX "bogus return value %d\n", ret); ++ break; ++ } ++ ++ return ret; ++} ++ ++static int __devexit xen_wdt_remove(struct platform_device *dev) ++{ ++ /* Stop the timer before we leave */ ++ if (!nowayout) ++ xen_wdt_stop(); ++ ++ misc_deregister(&xen_wdt_miscdev); ++ ++ return 0; ++} ++ ++static void xen_wdt_shutdown(struct platform_device *dev) ++{ ++ xen_wdt_stop(); ++} ++ ++static int xen_wdt_suspend(struct platform_device *dev, pm_message_t state) ++{ ++ return xen_wdt_stop(); ++} ++ ++static int xen_wdt_resume(struct platform_device *dev) ++{ ++ return xen_wdt_start(); ++} ++ ++static struct platform_driver xen_wdt_driver = { ++ .probe = xen_wdt_probe, ++ .remove = __devexit_p(xen_wdt_remove), ++ .shutdown = xen_wdt_shutdown, ++ .suspend = xen_wdt_suspend, ++ .resume = xen_wdt_resume, ++ .driver = { ++ .owner = THIS_MODULE, ++ .name = DRV_NAME, ++ }, ++}; ++ ++static int __init xen_wdt_init_module(void) ++{ ++ int err; ++ ++ if (!xen_domain()) ++ return -ENODEV; ++ ++ printk(KERN_INFO PFX "Xen WatchDog Timer Driver v%s\n", DRV_VERSION); ++ ++ err = platform_driver_register(&xen_wdt_driver); ++ if (err) ++ return err; ++ ++ platform_device = platform_device_register_simple(DRV_NAME, ++ -1, NULL, 0); ++ if (IS_ERR(platform_device)) { ++ err = PTR_ERR(platform_device); ++ platform_driver_unregister(&xen_wdt_driver); ++ } ++ ++ return err; ++} ++ ++static void __exit xen_wdt_cleanup_module(void) ++{ ++ platform_device_unregister(platform_device); ++ platform_driver_unregister(&xen_wdt_driver); ++ printk(KERN_INFO PFX "module unloaded\n"); ++} ++ ++module_init(xen_wdt_init_module); ++module_exit(xen_wdt_cleanup_module); ++ ++MODULE_AUTHOR("Jen Beulich <jbeulich@novell.com>"); ++MODULE_DESCRIPTION("Xen WatchDog Timer Driver"); ++MODULE_VERSION(DRV_VERSION); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index cab100a..fa9982e 100644 --- a/drivers/xen/Kconfig @@ -11946,10 +12576,10 @@ index 0000000..e83b615 +subsys_initcall(xen_acpi_processor_extcntl_init); +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c -index 4204336..b76245c 100644 +index 4204336..ce198b4 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c -@@ -43,6 +43,7 @@ +@@ -43,22 +43,26 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/sysdev.h> @@ -11957,7 +12587,10 @@ index 4204336..b76245c 100644 #include <asm/page.h> #include <asm/pgalloc.h> -@@ -52,13 +53,15 @@ + #include <asm/pgtable.h> + #include <asm/uaccess.h> + #include <asm/tlb.h> ++#include <asm/e820.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -11974,7 +12607,7 @@ index 4204336..b76245c 100644 #define BALLOON_CLASS_NAME "xen_memory" -@@ -82,14 +85,15 @@ static struct sys_device balloon_sysdev; +@@ -82,14 +86,15 @@ static struct sys_device balloon_sysdev; static int register_balloon(struct sys_device *sysdev); @@ -11996,7 +12629,7 @@ index 4204336..b76245c 100644 /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; -@@ -118,12 +122,43 @@ static struct timer_list balloon_timer; +@@ -118,12 +123,43 @@ static struct timer_list balloon_timer; static void scrub_page(struct page *page) { #ifdef CONFIG_XEN_SCRUB_PAGES @@ -12042,7 +12675,7 @@ index 4204336..b76245c 100644 { /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { -@@ -134,7 +169,11 @@ static void balloon_append(struct page *page) +@@ -134,7 +170,11 @@ static void balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } @@ -12054,7 +12687,7 @@ index 4204336..b76245c 100644 totalram_pages--; } -@@ -195,20 +234,17 @@ static unsigned long current_target(void) +@@ -195,20 +235,17 @@ static unsigned long current_target(void) static int increase_reservation(unsigned long nr_pages) { @@ -12076,7 +12709,7 @@ index 4204336..b76245c 100644 page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); -@@ -218,6 +254,8 @@ static int increase_reservation(unsigned long nr_pages) +@@ -218,6 +255,8 @@ static int increase_reservation(unsigned long nr_pages) set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; @@ -12085,7 +12718,7 @@ index 4204336..b76245c 100644 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc < 0) goto out; -@@ -227,19 +265,22 @@ static int increase_reservation(unsigned long nr_pages) +@@ -227,19 +266,22 @@ static int increase_reservation(unsigned long nr_pages) BUG_ON(page == NULL); pfn = page_to_pfn(page); @@ -12118,7 +12751,7 @@ index 4204336..b76245c 100644 } /* Relinquish the page back to the allocator. */ -@@ -251,20 +292,18 @@ static int increase_reservation(unsigned long nr_pages) +@@ -251,20 +293,18 @@ static int increase_reservation(unsigned long nr_pages) balloon_stats.current_pages += rc; out: @@ -12143,7 +12776,7 @@ index 4204336..b76245c 100644 .domid = DOMID_SELF }; -@@ -272,7 +311,7 @@ static int decrease_reservation(unsigned long nr_pages) +@@ -272,7 +312,7 @@ static int decrease_reservation(unsigned long nr_pages) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { @@ -12152,7 +12785,7 @@ index 4204336..b76245c 100644 nr_pages = i; need_sleep = 1; break; -@@ -282,38 +321,49 @@ static int decrease_reservation(unsigned long nr_pages) +@@ -282,38 +322,49 @@ static int decrease_reservation(unsigned long nr_pages) frame_list[i] = pfn_to_mfn(pfn); scrub_page(page); @@ -12218,7 +12851,7 @@ index 4204336..b76245c 100644 return need_sleep; } -@@ -379,7 +429,7 @@ static void watch_target(struct xenbus_watch *watch, +@@ -379,7 +430,7 @@ static void watch_target(struct xenbus_watch *watch, /* The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ @@ -12227,7 +12860,14 @@ index 4204336..b76245c 100644 } static int balloon_init_watcher(struct notifier_block *notifier, -@@ -405,9 +455,12 @@ static int __init balloon_init(void) +@@ -399,15 +450,18 @@ static struct notifier_block xenstore_notifier; + + static int __init balloon_init(void) + { +- unsigned long pfn; ++ unsigned long pfn, extra_pfn_end; + struct page *page; + if (!xen_pv_domain()) return -ENODEV; @@ -12242,13 +12882,15 @@ index 4204336..b76245c 100644 balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; -@@ -420,10 +473,13 @@ static int __init balloon_init(void) +@@ -420,10 +474,15 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { ++ extra_pfn_end = min(e820_end_of_ram_pfn(), ++ (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); + for (pfn = PFN_UP(xen_extra_mem_start); -+ pfn < PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size); ++ pfn < extra_pfn_end; + pfn += balloon_npages) { page = pfn_to_page(pfn); - if (!PageReserved(page)) @@ -12259,7 +12901,7 @@ index 4204336..b76245c 100644 } target_watch.callback = watch_target; -@@ -444,6 +500,121 @@ static void balloon_exit(void) +@@ -444,6 +503,121 @@ static void balloon_exit(void) module_exit(balloon_exit); @@ -12381,7 +13023,7 @@ index 4204336..b76245c 100644 #define BALLOON_SHOW(name, format, args...) \ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, \ -@@ -477,7 +648,7 @@ static ssize_t store_target_kb(struct sys_device *dev, +@@ -477,7 +651,7 @@ static ssize_t store_target_kb(struct sys_device *dev, target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; @@ -12390,7 +13032,7 @@ index 4204336..b76245c 100644 return count; } -@@ -491,7 +662,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr +@@ -491,7 +665,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr { return sprintf(buf, "%llu\n", (unsigned long long)balloon_stats.target_pages @@ -12399,7 +13041,7 @@ index 4204336..b76245c 100644 } static ssize_t store_target(struct sys_device *dev, -@@ -507,7 +678,7 @@ static ssize_t store_target(struct sys_device *dev, +@@ -507,7 +681,7 @@ static ssize_t store_target(struct sys_device *dev, target_bytes = memparse(buf, &endchar); @@ -16905,7 +17547,7 @@ index bdfd584..6625ffe 100644 #include <asm/xen/hypervisor.h> diff --git a/drivers/xen/events.c b/drivers/xen/events.c -index 30e0467..6b6f563 100644 +index a4dc7bf..4f64072 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -16,7 +16,7 @@ @@ -16996,16 +17638,15 @@ index 30e0467..6b6f563 100644 static inline unsigned long *cpu_evtchn_mask(int cpu) { return cpu_evtchn_mask_p[cpu].bits; -@@ -106,6 +130,8 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) - #define VALID_EVTCHN(chn) ((chn) != 0) +@@ -107,6 +131,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) static struct irq_chip xen_dynamic_chip; -+static struct irq_chip xen_percpu_chip; + static struct irq_chip xen_percpu_chip; +static struct irq_chip xen_pirq_chip; /* Constructor for packed IRQ information. */ static struct irq_info mk_unbound_info(void) -@@ -135,7 +161,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn, +@@ -136,7 +161,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short gsi, unsigned short vector) { return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, @@ -17015,7 +17656,7 @@ index 30e0467..6b6f563 100644 } /* -@@ -218,6 +245,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn) +@@ -219,6 +245,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn) return ret; } @@ -17031,7 +17672,7 @@ index 30e0467..6b6f563 100644 static inline unsigned long active_evtchns(unsigned int cpu, struct shared_info *sh, unsigned int idx) -@@ -299,6 +335,14 @@ static void mask_evtchn(int port) +@@ -300,6 +335,14 @@ static void mask_evtchn(int port) sync_set_bit(port, &s->evtchn_mask[0]); } @@ -17046,7 +17687,7 @@ index 30e0467..6b6f563 100644 static void unmask_evtchn(int port) { struct shared_info *s = HYPERVISOR_shared_info; -@@ -329,26 +373,370 @@ static void unmask_evtchn(int port) +@@ -330,27 +373,371 @@ static void unmask_evtchn(int port) put_cpu(); } @@ -17104,8 +17745,8 @@ index 30e0467..6b6f563 100644 - dynamic_irq_init(irq); + dynamic_irq_init_keep_chip_data(irq); -+ -+ return irq; + + return irq; + +no_irqs: + panic("No available IRQ to bind to: increase nr_irqs!\n"); @@ -17132,8 +17773,8 @@ index 30e0467..6b6f563 100644 + int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + WARN_ON(rc); + } -+} -+ + } + +static void pirq_query_unmask(int irq) +{ + struct physdev_irq_status_query irq_status; @@ -17336,7 +17977,7 @@ index 30e0467..6b6f563 100644 + } + } + irq_info[irq] = mk_unbound_info(); - ++ + dynamic_irq_cleanup(irq); + +out: @@ -17403,8 +18044,8 @@ index 30e0467..6b6f563 100644 + +out: + spin_unlock(&irq_mapping_update_lock); - return irq; - } ++ return irq; ++} +#endif +#endif + @@ -17418,30 +18059,20 @@ index 30e0467..6b6f563 100644 + return gsi_from_irq(irq); +} +EXPORT_SYMBOL_GPL(xen_gsi_from_irq); - ++ int bind_evtchn_to_irq(unsigned int evtchn) { -@@ -362,7 +750,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) + int irq; +@@ -363,7 +750,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) irq = find_unbound_irq(); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, -- handle_level_irq, "event"); +- handle_edge_irq, "event"); + handle_fasteoi_irq, "event"); evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_evtchn_info(evtchn); -@@ -388,8 +776,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) - if (irq < 0) - goto out; - -- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, -- handle_level_irq, "ipi"); -+ set_irq_chip_and_handler_name(irq, &xen_percpu_chip, -+ handle_percpu_irq, "ipi"); - - bind_ipi.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, -@@ -409,8 +797,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +@@ -410,8 +797,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) return irq; } @@ -17450,8 +18081,7 @@ index 30e0467..6b6f563 100644 +{ + struct evtchn_bind_interdomain bind_interdomain; + int err; - --static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) ++ + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + @@ -17461,23 +18091,37 @@ index 30e0467..6b6f563 100644 + return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); +} + -+ + +-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; int evtchn, irq; -@@ -429,8 +832,8 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +@@ -421,6 +823,11 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) + irq = per_cpu(virq_to_irq, cpu)[virq]; - irq = find_unbound_irq(); - -- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, -- handle_level_irq, "virq"); + if (irq == -1) { ++ irq = find_unbound_irq(); ++ + set_irq_chip_and_handler_name(irq, &xen_percpu_chip, + handle_percpu_irq, "virq"); ++ + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, +@@ -428,11 +835,6 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) + BUG(); + evtchn = bind_virq.port; +- irq = find_unbound_irq(); +- +- set_irq_chip_and_handler_name(irq, &xen_percpu_chip, +- handle_percpu_irq, "virq"); +- evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_virq_info(evtchn, virq); -@@ -504,6 +907,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, + +@@ -505,6 +907,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, } EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); @@ -17507,7 +18151,7 @@ index 30e0467..6b6f563 100644 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) -@@ -617,17 +1043,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count); +@@ -618,17 +1043,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count); * a bitset of words which contain pending event bits. The second * level is a bitset of pending events themselves. */ @@ -17526,7 +18170,7 @@ index 30e0467..6b6f563 100644 do { unsigned long pending_words; -@@ -650,9 +1072,16 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) +@@ -651,9 +1072,16 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) int bit_idx = __ffs(pending_bits); int port = (word_idx * BITS_PER_LONG) + bit_idx; int irq = evtchn_to_irq[port]; @@ -17545,7 +18189,7 @@ index 30e0467..6b6f563 100644 } } -@@ -660,14 +1089,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) +@@ -661,14 +1089,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) count = __get_cpu_var(xed_nesting_count); __get_cpu_var(xed_nesting_count) = 0; @@ -17580,7 +18224,7 @@ index 30e0467..6b6f563 100644 /* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(int evtchn, int irq) -@@ -704,7 +1151,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +@@ -705,7 +1151,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) struct evtchn_bind_vcpu bind_vcpu; int evtchn = evtchn_from_irq(irq); @@ -17592,7 +18236,7 @@ index 30e0467..6b6f563 100644 return -1; /* Send future instances of this interrupt to other vcpu. */ -@@ -745,33 +1195,17 @@ int resend_irq_on_evtchn(unsigned int irq) +@@ -746,33 +1195,17 @@ int resend_irq_on_evtchn(unsigned int irq) return 1; } @@ -17629,7 +18273,7 @@ index 30e0467..6b6f563 100644 { int evtchn = evtchn_from_irq(irq); struct shared_info *sh = HYPERVISOR_shared_info; -@@ -856,7 +1290,7 @@ void xen_clear_irq_pending(int irq) +@@ -857,7 +1290,7 @@ void xen_clear_irq_pending(int irq) if (VALID_EVTCHN(evtchn)) clear_evtchn(evtchn); } @@ -17638,7 +18282,7 @@ index 30e0467..6b6f563 100644 void xen_set_irq_pending(int irq) { int evtchn = evtchn_from_irq(irq); -@@ -876,9 +1310,9 @@ bool xen_test_irq_pending(int irq) +@@ -877,9 +1310,9 @@ bool xen_test_irq_pending(int irq) return ret; } @@ -17650,7 +18294,7 @@ index 30e0467..6b6f563 100644 { evtchn_port_t evtchn = evtchn_from_irq(irq); -@@ -886,13 +1320,33 @@ void xen_poll_irq(int irq) +@@ -887,13 +1320,33 @@ void xen_poll_irq(int irq) struct sched_poll poll; poll.nr_ports = 1; @@ -17685,7 +18329,7 @@ index 30e0467..6b6f563 100644 void xen_irq_resume(void) { -@@ -915,27 +1369,117 @@ void xen_irq_resume(void) +@@ -916,37 +1369,117 @@ void xen_irq_resume(void) restore_cpu_virqs(cpu); restore_cpu_ipis(cpu); } @@ -17694,7 +18338,7 @@ index 30e0467..6b6f563 100644 + struct physdev_pirq_eoi_gmfn eoi_gmfn; + + eoi_gmfn.gmfn = virt_to_mfn(pirq_needs_eoi_bits); -+ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0) { ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) != 0) { + /* Could recover by reverting to old method...? */ + BUG(); + } @@ -17710,22 +18354,28 @@ index 30e0467..6b6f563 100644 + .disable = mask_irq, + .mask = mask_irq, + .unmask = unmask_irq, -+ + +- .ack = ack_dynirq, + .eoi = ack_dynirq, -+ .set_affinity = set_affinity_irq, + .set_affinity = set_affinity_irq, +- .retrigger = retrigger_dynirq, + .retrigger = retrigger_irq, -+}; -+ + }; + +-static struct irq_chip en_percpu_chip __read_mostly = { +static struct irq_chip xen_percpu_chip __read_mostly = { -+ .name = "xen-percpu", -+ + .name = "xen-percpu", + +- .disable = disable_dynirq, +- .mask = disable_dynirq, +- .unmask = enable_dynirq, + .disable = mask_irq, + .mask = mask_irq, + .unmask = unmask_irq, .ack = ack_dynirq, -+}; -+ + }; + +static struct irq_chip xen_pirq_chip __read_mostly = { + .name = "xen-pirq", + @@ -17741,12 +18391,11 @@ index 30e0467..6b6f563 100644 + .eoi = ack_pirq, + .end = end_pirq, + - .set_affinity = set_affinity_irq, -- .retrigger = retrigger_dynirq, ++ .set_affinity = set_affinity_irq, + + .retrigger = retrigger_irq, - }; - ++}; ++ +int xen_set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; @@ -17808,7 +18457,7 @@ index 30e0467..6b6f563 100644 init_evtchn_cpu_bindings(); -@@ -943,5 +1487,11 @@ void __init xen_init_IRQ(void) +@@ -954,5 +1487,11 @@ void __init xen_init_IRQ(void) for (i = 0; i < NR_EVENT_CHANNELS; i++) mask_evtchn(i); @@ -27783,10 +28432,10 @@ index 0000000..f0d5426 +} diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c new file mode 100644 -index 0000000..6450c17 +index 0000000..6d1a770 --- /dev/null +++ b/drivers/xen/pcpu.c -@@ -0,0 +1,420 @@ +@@ -0,0 +1,452 @@ +/* + * pcpu.c - management physical cpu in dom0 environment + */ @@ -28102,6 +28751,38 @@ index 0000000..6450c17 + return pcpu; +} + ++int xen_pcpu_index(uint32_t id, int is_acpiid) ++{ ++ int cpu_num = 0, max_id = 0, ret; ++ xen_platform_op_t op = { ++ .cmd = XENPF_get_cpuinfo, ++ .interface_version = XENPF_INTERFACE_VERSION, ++ }; ++ struct xenpf_pcpuinfo *info = &op.u.pcpu_info; ++ ++ info->xen_cpuid = 0; ++ ret = HYPERVISOR_dom0_op(&op); ++ if (ret) ++ return -1; ++ max_id = op.u.pcpu_info.max_present; ++ ++ while ((cpu_num <= max_id)) { ++ info->xen_cpuid = cpu_num; ++ ret = HYPERVISOR_dom0_op(&op); ++ if (ret) ++ continue; ++ ++ if (op.u.pcpu_info.max_present > max_id) ++ max_id = op.u.pcpu_info.max_present; ++ if (id == (is_acpiid ? info->acpi_id : info->apic_id)) ++ return cpu_num; ++ cpu_num++; ++ } ++ ++ return -1; ++} ++EXPORT_SYMBOL(xen_pcpu_index); ++ +/* + * Sync dom0's pcpu information with xen hypervisor's + */ @@ -28206,7 +28887,7 @@ index 0000000..6450c17 + return err; +} + -+subsys_initcall(xen_pcpu_init); ++arch_initcall(xen_pcpu_init); diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c new file mode 100644 index 0000000..c01b5dd @@ -31168,10 +31849,10 @@ index 176c518..d681cc9 100644 +}; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h -index 24c3956..e8cf80f 100644 +index a8d25e4..1bc4927 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -105,6 +105,12 @@ extern unsigned int kobjsize(const void *objp); +@@ -109,6 +109,12 @@ extern unsigned int kobjsize(const void *objp); #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ @@ -31184,7 +31865,7 @@ index 24c3956..e8cf80f 100644 #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS -@@ -195,6 +201,11 @@ struct vm_operations_struct { +@@ -199,6 +205,11 @@ struct vm_operations_struct { */ int (*access)(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); @@ -33121,6 +33802,57 @@ index 0000000..17ae622 +DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ +diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h +index 5fec575..dd55dac 100644 +--- a/include/xen/interface/sched.h ++++ b/include/xen/interface/sched.h +@@ -65,6 +65,39 @@ struct sched_poll { + DEFINE_GUEST_HANDLE_STRUCT(sched_poll); + + /* ++ * Declare a shutdown for another domain. The main use of this function is ++ * in interpreting shutdown requests and reasons for fully-virtualized ++ * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. ++ * @arg == pointer to sched_remote_shutdown structure. ++ */ ++#define SCHEDOP_remote_shutdown 4 ++struct sched_remote_shutdown { ++ domid_t domain_id; /* Remote domain ID */ ++ unsigned int reason; /* SHUTDOWN_xxx reason */ ++}; ++ ++/* ++ * Latch a shutdown code, so that when the domain later shuts down it ++ * reports this code to the control tools. ++ * @arg == as for SCHEDOP_shutdown. ++ */ ++#define SCHEDOP_shutdown_code 5 ++ ++/* ++ * Setup, poke and destroy a domain watchdog timer. ++ * @arg == pointer to sched_watchdog structure. ++ * With id == 0, setup a domain watchdog timer to cause domain shutdown ++ * after timeout, returns watchdog id. ++ * With id != 0 and timeout == 0, destroy domain watchdog timer. ++ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. ++ */ ++#define SCHEDOP_watchdog 6 ++struct sched_watchdog { ++ uint32_t id; /* watchdog ID */ ++ uint32_t timeout; /* timeout */ ++}; ++ ++/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. +@@ -73,5 +106,6 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_poll); + #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ + #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ + #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ ++#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ + + #endif /* __XEN_PUBLIC_SCHED_H__ */ diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h new file mode 100644 index 0000000..f31fdab @@ -33651,10 +34383,10 @@ index eaf85fa..0be36b9 100644 +#endif /* _XEN_PAGE_H */ diff --git a/include/xen/pcpu.h b/include/xen/pcpu.h new file mode 100644 -index 0000000..fb2bf6b +index 0000000..7e8f9d1 --- /dev/null +++ b/include/xen/pcpu.h -@@ -0,0 +1,30 @@ +@@ -0,0 +1,32 @@ +#ifndef _XEN_PCPU_H +#define _XEN_PCPU_H + @@ -33684,6 +34416,8 @@ index 0000000..fb2bf6b +extern int register_xen_pcpu_notifier(struct notifier_block *nb); + +extern void unregister_xen_pcpu_notifier(struct notifier_block *nb); ++ ++extern int xen_pcpu_index(uint32_t acpi_id, int is_acpiid); +#endif diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h new file mode 100644 @@ -35719,7 +36453,7 @@ index 555d5d2..d1dc23c 100644 { int aligned; diff --git a/mm/memory.c b/mm/memory.c -index 194dc17..5b0d7f1 100644 +index 53c1da0..c8741df 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -553,6 +553,13 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, @@ -35823,10 +36557,10 @@ index 194dc17..5b0d7f1 100644 } EXPORT_SYMBOL_GPL(apply_to_page_range); diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 36992b6..bc1b6e9 100644 +index 902e5fc..101715c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -593,6 +593,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) +@@ -594,6 +594,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) if (bad) return; @@ -35840,7 +36574,7 @@ index 36992b6..bc1b6e9 100644 if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order); debug_check_no_obj_freed(page_address(page), -@@ -1087,6 +1094,13 @@ static void free_hot_cold_page(struct page *page, int cold) +@@ -1088,6 +1095,13 @@ static void free_hot_cold_page(struct page *page, int cold) kmemcheck_free_shadow(page, 0); |