diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5f6aa11..9ec8558 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -113,6 +113,7 @@ parameter is applicable: More X86-64 boot options can be found in Documentation/x86/x86_64/boot-options.txt . X86 Either 32bit or 64bit x86 (same as X86-32+X86-64) + XEN Xen support is enabled In addition, the following text indicates that the option: @@ -2760,6 +2761,18 @@ and is between 256 and 4096 characters. It is defined in the file xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. xd_geo= See header of drivers/block/xd.c. + xen_emul_unplug= [HW,X86,XEN] + Unplug Xen emulated devices + Format: [unplug0,][unplug1] + ide-disks -- unplug primary master IDE devices + aux-ide-disks -- unplug non-primary-master IDE devices + nics -- unplug network devices + all -- unplug all emulated devices (NICs and IDE disks) + unnecessary -- unplugging emulated devices is + unnecessary even if the host did not respond to + the unplug protocol + never -- do not unplug even if version check succeeds + xirc2ps_cs= [NET,PCMCIA] Format: ,,,,,[,[,[,]]] diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 29a6ff8..81f9b94 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -267,10 +267,14 @@ IOMMU (input/output memory management unit) iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU implementation: - swiotlb=[,force] + swiotlb=[npages=] + swiotlb=[force] + swiotlb=[overflow=] + Prereserve that many 128K pages for the software IO bounce buffering. force Force all IO through the software TLB. + Size in bytes of the overflow buffer. Settings for the IBM Calgary hardware IOMMU currently found in IBM pSeries and xSeries machines: diff --git a/Makefile b/Makefile index 6a29b82..83813cc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = .23 +EXTRAVERSION = .24 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 8d3c79c..7d09a09 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -73,7 +73,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return 0; - return addr + size <= *dev->dma_mask; + return addr + size - 1 <= *dev->dma_mask; } static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h index dcbaea7..f0acde6 100644 --- a/arch/ia64/include/asm/swiotlb.h +++ b/arch/ia64/include/asm/swiotlb.h @@ -4,8 +4,6 @@ #include #include -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern void pci_swiotlb_init(void); diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h index b8370c8..baa74c8 100644 --- a/arch/ia64/include/asm/xen/events.h +++ b/arch/ia64/include/asm/xen/events.h @@ -36,10 +36,6 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return !(ia64_psr(regs)->i); } -static inline void handle_irq(int irq, struct pt_regs *regs) -{ - __do_IRQ(irq); -} #define irq_ctx_init(cpu) do { } while (0) #endif /* _ASM_IA64_XEN_EVENTS_H */ diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8..53292ab 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { void __init swiotlb_dma_init(void) { dma_ops = &swiotlb_dma_ops; - swiotlb_init(); + swiotlb_init(1); } void __init pci_swiotlb_init(void) @@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); machvec_init("dig"); - swiotlb_init(); + swiotlb_init(1); dma_ops = &swiotlb_dma_ops; #else panic("Unable to find Intel IOMMU"); diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index e281dae..80a973b 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -197,7 +197,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return 0; - return addr + size <= *dev->dma_mask; + return addr + size - 1 <= *dev->dma_mask; } static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 53bcf3d..b152de3 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 04f638d..df2c9e9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb5a57c..a3b7475 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1885,6 +1885,10 @@ config PCI_OLPC def_bool y depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) +config PCI_XEN + bool + select SWIOTLB + config PCI_DOMAINS def_bool y depends on PCI diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 18aa3f8..4413ba4 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -23,20 +23,16 @@ #include #ifdef CONFIG_AMD_IOMMU -extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); -extern void amd_iommu_shutdown(void); extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_init_api(void); #else -static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } -static inline void amd_iommu_shutdown(void) { } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index b03bedb..0918654 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -62,10 +62,8 @@ struct cal_chipset_ops { extern int use_calgary; #ifdef CONFIG_CALGARY_IOMMU -extern int calgary_iommu_init(void); extern void detect_calgary(void); #else -static inline int calgary_iommu_init(void) { return 1; } static inline void detect_calgary(void) { return; } #endif diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6a25d5d..ac91eed 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -20,7 +20,8 @@ # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) #endif -extern dma_addr_t bad_dma_address; +#define DMA_ERROR_CODE 0 + extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; @@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); + return (dma_addr == DMA_ERROR_CODE); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) @@ -66,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return 0; - return addr + size <= *dev->dma_mask; + return addr + size - 1 <= *dev->dma_mask; } static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 40b4e61..fa3fd43 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -109,6 +109,8 @@ extern void reserve_early(u64 start, u64 end, char *name); extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); extern void free_early(u64 start, u64 end); extern void early_res_to_bootmem(u64 start, u64 end); +extern u64 early_res_next_free(u64 start); +extern u64 early_res_next_reserved(u64 addr, u64 max); extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); extern unsigned long e820_end_of_ram_pfn(void); diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 6cfdafa..4ac5b0f 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed; extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); -extern void gart_iommu_init(void); -extern void gart_iommu_shutdown(void); +extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); extern void gart_iommu_hole_init(void); @@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void); static inline void early_gart_iommu_check(void) { } -static inline void gart_iommu_init(void) -{ -} -static inline void gart_iommu_shutdown(void) -{ -} static inline void gart_parse_options(char *options) { } diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 3251e23..fa152cb 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -68,6 +68,7 @@ extern unsigned long force_hpet_address; extern int hpet_force_user; extern u8 hpet_msi_disable; extern int is_hpet_enabled(void); +extern int disable_hpet(char *); extern int hpet_enable(void); extern void hpet_disable(void); extern unsigned long hpet_readl(unsigned long a); @@ -108,6 +109,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); #else /* CONFIG_HPET_TIMER */ static inline int hpet_enable(void) { return 0; } +static inline int disable_hpet(char *s) { return 0; } static inline int is_hpet_enabled(void) { return 0; } #define hpet_readl(a) 0 diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 439a9ac..bf88684 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -36,16 +36,28 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, free_pgd_range(tlb, addr, end, floor, ceiling); } +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - set_pte_at(mm, addr, ptep, pte); +#if PAGETABLE_LEVELS >= 3 + set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte))); +#else + set_pgd((pgd_t *)ptep, native_make_pgd(native_pte_val(pte))); +#endif } static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return ptep_get_and_clear(mm, addr, ptep); + pte_t pte = huge_ptep_get(ptep); + + set_huge_pte_at(mm, addr, ptep, __pte(0)); + return pte; } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, @@ -66,19 +78,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep_set_wrprotect(mm, addr, ptep); + pte_t pte = huge_ptep_get(ptep); + + pte = pte_wrprotect(pte); + set_huge_pte_at(mm, addr, ptep, pte); } static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} + pte_t oldpte = huge_ptep_get(ptep); + int changed = !pte_same(oldpte, pte); -static inline pte_t huge_ptep_get(pte_t *ptep) -{ - return *ptep; + if (changed && dirty) { + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + flush_tlb_page(vma, addr); + } + + return changed; } static inline int arch_prepare_hugepage(struct page *page) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 7373932..49ee1a9 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -7,6 +7,10 @@ #include #include +#include + +extern int isapnp_disable; + #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ @@ -199,6 +203,17 @@ extern void __iomem *early_memremap(resource_size_t phys_addr, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); +#ifdef CONFIG_XEN +struct bio_vec; + +extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); + +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) +#endif /* CONFIG_XEN */ + #define IO_SPACE_LIMIT 0xffff #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 5f61f6e..b852da9 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -172,6 +172,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void probe_nr_irqs_gsi(void); +extern int get_nr_irqs_gsi(void); extern int setup_ioapic_entry(int apic, int irq, struct IO_APIC_route_entry *entry, @@ -201,4 +202,6 @@ static inline void probe_nr_irqs_gsi(void) { } #endif +void xen_io_apic_init(void); + #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21b..345c99c 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern void pci_iommu_shutdown(void); -extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6e90a04..ba4dc7b 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -120,6 +120,12 @@ */ #define MCE_SELF_VECTOR 0xeb +#ifdef CONFIG_XEN +/* Xen vector callback to receive events in a HVM domain */ +#define XEN_HVM_EVTCHN_CALLBACK 0xe9 +#endif + + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority @@ -157,6 +163,14 @@ static inline int invalid_vm86_irq(int irq) #define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) +#ifndef __ASSEMBLY__ +# if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SPARSE_IRQ) +extern int nr_dynamic_irqs; +# else +# define NR_DYNAMIC_IRQS 256 +# endif +#endif + #ifdef CONFIG_X86_IO_APIC # ifdef CONFIG_SPARSE_IRQ # define NR_IRQS \ @@ -165,13 +179,13 @@ static inline int invalid_vm86_irq(int irq) (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # else # if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) +# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) + NR_DYNAMIC_IRQS # else -# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) + NR_DYNAMIC_IRQS # endif # endif #else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS NR_IRQS_LEGACY +# define NR_IRQS NR_IRQS_LEGACY + NR_DYNAMIC_IRQS #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b50..e15fca1 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -55,4 +55,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void) } #endif +#ifdef CONFIG_MICROCODE_XEN +extern struct microcode_ops * __init init_xen_microcode(void); +#else +static inline struct microcode_ops * __init init_xen_microcode(void) +{ + return NULL; +} +#endif + #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 80a1dee..67eaa91 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -13,6 +13,9 @@ typedef struct { int size; struct mutex lock; void *vdso; +#ifdef CONFIG_XEN + int has_foreign_mappings; +#endif } mm_context_t; #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index efb3899..e571db4 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -330,11 +330,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); } + static inline void set_iopl_mask(unsigned mask) { PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); } +static inline void set_io_bitmap(struct thread_struct *thread, + unsigned long bytes_updated) +{ + PVOP_VCALL2(pv_cpu_ops.set_io_bitmap, thread, bytes_updated); +} + /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { @@ -770,15 +777,28 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) #define PV_RESTORE_REGS "popl %edx; popl %ecx;" /* save and restore all caller-save registers, except return value */ -#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" -#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" +#define __PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" +#define __PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" + +#ifdef CONFIG_FRAME_POINTER +#define PV_SAVE_ALL_CALLER_REGS \ + "push %ebp;" \ + "mov %esp, %ebp;" \ + __PV_SAVE_ALL_CALLER_REGS +#define PV_RESTORE_ALL_CALLER_REGS \ + __PV_RESTORE_ALL_CALLER_REGS \ + "leave;" +#else +#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS +#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS +#endif #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS #define PV_VEXTRA_CLOBBERS #else /* save and restore all caller-save registers, except return value */ -#define PV_SAVE_ALL_CALLER_REGS \ +#define __PV_SAVE_ALL_CALLER_REGS \ "push %rcx;" \ "push %rdx;" \ "push %rsi;" \ @@ -787,7 +807,7 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) "push %r9;" \ "push %r10;" \ "push %r11;" -#define PV_RESTORE_ALL_CALLER_REGS \ +#define __PV_RESTORE_ALL_CALLER_REGS \ "pop %r11;" \ "pop %r10;" \ "pop %r9;" \ @@ -797,6 +817,19 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) "pop %rdx;" \ "pop %rcx;" +#ifdef CONFIG_FRAME_POINTER +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rbp;" \ + "mov %rsp, %rbp;" \ + __PV_SAVE_ALL_CALLER_REGS +#define PV_RESTORE_ALL_CALLER_REGS \ + __PV_RESTORE_ALL_CALLER_REGS \ + "leaveq;" +#else +#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS +#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS +#endif + /* We save some registers, but all of them, that's too much. We clobber all * caller saved registers but the argument parameter */ #define PV_SAVE_REGS "pushq %%rdi;" diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 9357473..3202dcc 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -135,6 +135,8 @@ struct pv_cpu_ops { void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); + void (*set_io_bitmap)(struct thread_struct *thread, + unsigned long bytes_updated); void (*wbinvd)(void); void (*io_delay)(void); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ada8c20..faa0af1 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -21,6 +21,7 @@ struct pci_sysdata { extern int pci_routeirq; extern int noioapicquirk; extern int noioapicreroute; +extern int pci_scan_all_fns; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, @@ -49,6 +50,11 @@ extern unsigned int pcibios_assign_all_busses(void); #define pcibios_assign_all_busses() 0 #endif +static inline int pcibios_scan_all_fns(struct pci_bus *bus, int devfn) +{ + return pci_scan_all_fns; +} + extern unsigned long pci_mem_start; #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM (pci_mem_start) @@ -87,6 +93,7 @@ extern void pci_iommu_alloc(void); /* MSI arch hook */ #define arch_setup_msi_irqs arch_setup_msi_irqs +#define arch_teardown_msi_irqs arch_teardown_msi_irqs #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) @@ -128,6 +135,7 @@ extern void pci_iommu_alloc(void); #include /* generic pci stuff */ +#define HAVE_ARCH_PCIBIOS_SCAN_ALL_FNS #include #define PCIBIOS_MAX_MEM_32 0xffffffff diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988..30cbf49 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -45,6 +45,7 @@ enum pci_bf_sort_state { extern unsigned int pcibios_max_latency; void pcibios_resource_survey(void); +void pcibios_set_cache_line_size(void); /* pci-pc.c */ @@ -106,6 +107,7 @@ extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); extern int pci_olpc_init(void); +extern int pci_xen_init(void); extern void __init dmi_check_pciprobe(void); extern void __init dmi_check_skip_isa_align(void); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index af6fd36..430e3cc 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,7 +15,6 @@ : (prot)) #ifndef __ASSEMBLY__ - /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -26,6 +25,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern spinlock_t pgd_lock; extern struct list_head pgd_list; +extern struct mm_struct *pgd_page_get_mm(struct page *page); + #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ @@ -76,6 +77,11 @@ extern struct list_head pgd_list; #endif /* CONFIG_PARAVIRT */ +static inline pteval_t pte_flags(pte_t pte) +{ + return pte_val(pte) & PTE_FLAGS_MASK; +} + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -397,6 +403,9 @@ static inline unsigned long pages_to_mb(unsigned long npg) #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) +#define arch_vm_get_page_prot arch_vm_get_page_prot +extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags); + #if PAGETABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { @@ -616,6 +625,9 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) memcpy(dst, src, count * sizeof(pgd_t)); } +int create_lookup_pte_addr(struct mm_struct *mm, + unsigned long address, + uint64_t *ptep); #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c57a301..4e46931 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -160,7 +160,7 @@ extern void cleanup_highmap(void); #define pgtable_cache_init() do { } while (0) #define check_pgt_cache() do { } while (0) -#define PAGE_AGP PAGE_KERNEL_NOCACHE +#define PAGE_AGP PAGE_KERNEL_IO_NOCACHE #define HAVE_PAGE_AGP 1 /* fs/proc/kcore.c */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d1f4a76..a81b0ed 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -265,11 +265,6 @@ static inline pteval_t native_pte_val(pte_t pte) return pte.pte; } -static inline pteval_t pte_flags(pte_t pte) -{ - return native_pte_val(pte) & PTE_FLAGS_MASK; -} - #define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 13b1885..0aac25a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -551,6 +551,9 @@ static inline void native_set_iopl_mask(unsigned mask) #endif } +extern void native_set_io_bitmap(struct thread_struct *thread, + unsigned long updated_bytes); + static inline void native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) { @@ -592,6 +595,7 @@ static inline void load_sp0(struct tss_struct *tss, } #define set_iopl_mask native_set_iopl_mask +#define set_io_bitmap native_set_io_bitmap #endif /* CONFIG_PARAVIRT */ /* diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 18e496c..154a5f1 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -95,6 +95,11 @@ void *extend_brk(size_t size, size_t align); : : "i" (sz)); \ } +/* Helper for reserving space for arrays of things */ +#define RESERVE_BRK_ARRAY(type, name, entries) \ + type *name; \ + RESERVE_BRK(name, sizeof(type) * entries) + #ifdef __i386__ void __init i386_start_kernel(void); diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index b9e4e20..8085277 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -3,15 +3,16 @@ #include -/* SWIOTLB interface */ - -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; -extern void pci_swiotlb_init(void); +extern int __init pci_swiotlb_detect(void); +extern void __init pci_swiotlb_init(void); #else #define swiotlb 0 +static inline int pci_swiotlb_detect(void) +{ + return 0; +} static inline void pci_swiotlb_init(void) { } diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 1bb6e39..ef0fa4d 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -33,11 +33,11 @@ long sys_rt_sigreturn(struct pt_regs *); asmlinkage int sys_set_thread_area(struct user_desc __user *); asmlinkage int sys_get_thread_area(struct user_desc __user *); -/* X86_32 only */ -#ifdef CONFIG_X86_32 /* kernel/ioport.c */ -long sys_iopl(struct pt_regs *); +asmlinkage long sys_iopl(unsigned int); +/* X86_32 only */ +#ifdef CONFIG_X86_32 /* kernel/process_32.c */ int sys_clone(struct pt_regs *); int sys_execve(struct pt_regs *); @@ -68,8 +68,6 @@ int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ /* X86_64 only */ -/* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process_64.c */ asmlinkage long sys_clone(unsigned long, unsigned long, diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 7f3eba0..e4fc8ea 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -89,6 +89,10 @@ static inline void __flush_tlb_one(unsigned long addr) #ifndef CONFIG_SMP +static inline void __init init_smp_flush(void) +{ +} + #define flush_tlb() __flush_tlb() #define flush_tlb_all() __flush_tlb_all() #define local_flush_tlb() __flush_tlb() @@ -129,6 +133,8 @@ static inline void reset_lazy_tlbstate(void) #define local_flush_tlb() __flush_tlb() +extern void init_smp_flush(void); + extern void flush_tlb_all(void); extern void flush_tlb_current_task(void); extern void flush_tlb_mm(struct mm_struct *); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd..d8e7145 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -91,6 +91,14 @@ struct x86_init_timers { }; /** + * struct x86_init_iommu - platform specific iommu setup + * @iommu_init: platform specific iommu setup + */ +struct x86_init_iommu { + int (*iommu_init)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -101,6 +109,7 @@ struct x86_init_ops { struct x86_init_oem oem; struct x86_init_paging paging; struct x86_init_timers timers; + struct x86_init_iommu iommu; }; /** @@ -121,6 +130,7 @@ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); + void (*iommu_shutdown)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9c371e4..41c4be0 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -45,6 +45,8 @@ #include #include #include +#include +#include /* * The hypercall asms have to meet several constraints: @@ -200,6 +202,23 @@ extern struct { char _entry[32]; } hypercall_page[]; (type)__res; \ }) +static inline long +privcmd_call(unsigned call, + unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, + unsigned long a5) +{ + __HYPERCALL_DECLS; + __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + + asm volatile("call *%[call]" + : __HYPERCALL_5PARAM + : [call] "a" (&hypercall_page[call]) + : __HYPERCALL_CLOBBER5); + + return (long)__res; +} + static inline int HYPERVISOR_set_trap_table(struct trap_info *table) { @@ -282,6 +301,20 @@ HYPERVISOR_set_timer_op(u64 timeout) } static inline int +HYPERVISOR_mca(struct xen_mc *mc_op) +{ + mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; + return _hypercall1(int, mca, mc_op); +} + +static inline int +HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) +{ + platform_op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, dom0_op, platform_op); +} + +static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) { return _hypercall2(int, set_debugreg, reg, value); @@ -417,6 +450,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) return _hypercall2(int, nmi_op, op, arg); } +static inline unsigned long __must_check +HYPERVISOR_hvm_op(int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { @@ -424,6 +463,14 @@ MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) mcl->args[0] = set; } +#if defined(CONFIG_X86_64) +#define MULTI_UVMFLAGS_INDEX 2 +#define MULTI_UVMDOMID_INDEX 3 +#else +#define MULTI_UVMFLAGS_INDEX 3 +#define MULTI_UVMDOMID_INDEX 4 +#endif + static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, pte_t new_val, unsigned long flags) @@ -432,12 +479,11 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, mcl->args[0] = va; if (sizeof(new_val) == sizeof(long)) { mcl->args[1] = new_val.pte; - mcl->args[2] = flags; } else { mcl->args[1] = new_val.pte; mcl->args[2] = new_val.pte >> 32; - mcl->args[3] = flags; } + mcl->args[MULTI_UVMFLAGS_INDEX] = flags; } static inline void diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d5b7e90..396ff4c 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -37,31 +37,4 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -enum xen_domain_type { - XEN_NATIVE, /* running on bare hardware */ - XEN_PV_DOMAIN, /* running in a PV domain */ - XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ -}; - -#ifdef CONFIG_XEN -extern enum xen_domain_type xen_domain_type; -#else -#define xen_domain_type XEN_NATIVE -#endif - -#define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain() && \ - xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && \ - xen_domain_type == XEN_HVM_DOMAIN) - -#ifdef CONFIG_XEN_DOM0 -#include - -#define xen_initial_domain() (xen_pv_domain() && \ - xen_start_info->flags & SIF_INITDOMAIN) -#else /* !CONFIG_XEN_DOM0 */ -#define xen_initial_domain() (0) -#endif /* CONFIG_XEN_DOM0 */ - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1..9539998 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 @@ -97,6 +97,8 @@ DEFINE_GUEST_HANDLE(void); #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) #ifndef __ASSEMBLY__ +#include + struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e00..8413688 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -32,6 +32,11 @@ /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d266..839a481 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -39,18 +39,7 @@ #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define __MACH2PHYS_SHIFT 3 /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff --git a/arch/x86/include/asm/xen/iommu.h b/arch/x86/include/asm/xen/iommu.h new file mode 100644 index 0000000..75df312 --- /dev/null +++ b/arch/x86/include/asm/xen/iommu.h @@ -0,0 +1,12 @@ +#ifndef ASM_X86__XEN_IOMMU_H + +#ifdef CONFIG_PCI_XEN +extern void xen_iommu_init(void); +#else +static inline void xen_iommu_init(void) +{ +} +#endif + +#endif + diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 018a0a4..8760cc6 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -35,16 +36,25 @@ typedef struct xpaddr { #define MAX_DOMAIN_PAGES \ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); -extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); static inline unsigned long pfn_to_mfn(unsigned long pfn) { + unsigned long mfn; + if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; + mfn = get_phys_to_machine(pfn); + + if (mfn != INVALID_P2M_ENTRY) + mfn &= ~FOREIGN_FRAME_BIT; + + return mfn; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) @@ -62,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; -#if 0 if (unlikely((mfn >> machine_to_phys_order) != 0)) - return max_mapnr; -#endif + return ~0; pfn = 0; /* @@ -112,13 +120,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) */ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn = mfn_to_pfn(mfn); - if ((pfn < max_mapnr) - && !xen_feature(XENFEAT_auto_translated_physmap) - && (get_phys_to_machine(pfn) != mfn)) - return max_mapnr; /* force !pfn_valid() */ - /* XXX fixme; not true with sparsemem */ + if (get_phys_to_machine(pfn) != mfn) + return -1; /* force !pfn_valid() */ return pfn; } @@ -163,6 +167,7 @@ static inline pte_t __pte_ma(pteval_t x) #define pgd_val_ma(x) ((x).pgd) +void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid); xmaddr_t arbitrary_virt_to_machine(void *address); unsigned long arbitrary_virt_to_mfn(void *vaddr); diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h new file mode 100644 index 0000000..6683196 --- /dev/null +++ b/arch/x86/include/asm/xen/pci.h @@ -0,0 +1,104 @@ +#ifndef _ASM_X86_XEN_PCI_H +#define _ASM_X86_XEN_PCI_H + +#if defined(CONFIG_PCI_MSI) +#if defined(CONFIG_PCI_XEN) +int xen_register_pirq(u32 gsi, int triggering); +int xen_register_gsi(u32 gsi, int triggering, int polarity); +int xen_create_msi_irq(struct pci_dev *dev, + struct msi_desc *msidesc, + int type); +void xen_pci_teardown_msi_dev(struct pci_dev *dev); +void xen_pci_teardown_msi_irq(int irq); +int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); + +/* The drivers/pci/xen-pcifront.c sets this structure to + * its own functions. + */ +struct xen_pci_frontend_ops { + int (*enable_msi)(struct pci_dev *dev, int **vectors); + void (*disable_msi)(struct pci_dev *dev); + int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + void (*disable_msix)(struct pci_dev *dev); +}; + +extern struct xen_pci_frontend_ops *xen_pci_frontend; + +static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, + int **vectors) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msi) + return xen_pci_frontend->enable_msi(dev, vectors); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msi) + xen_pci_frontend->disable_msi(dev); +} +static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, + int **vectors, int nvec) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msix) + return xen_pci_frontend->enable_msix(dev, vectors, nvec); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msix) + xen_pci_frontend->disable_msix(dev); +} +#else +static inline int xen_create_msi_irq(struct pci_dev *dev, + struct msi_desc *msidesc, + int type) +{ + return -1; +} +static inline void xen_pci_teardown_msi_dev(struct pci_dev *dev) { } +static inline void xen_pci_teardown_msi_irq(int irq) { } +static inline int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + return -ENODEV; +} +#endif /* CONFIG_PCI_XEN */ + +#endif /* CONFIG_PCI_MSI */ + +#ifdef CONFIG_XEN_DOM0_PCI +int xen_register_gsi(u32 gsi, int triggering, int polarity); +int xen_find_device_domain_owner(struct pci_dev *dev); +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); +int xen_unregister_device_domain_owner(struct pci_dev *dev); + +#else +static inline int xen_register_gsi(u32 gsi, int triggering, int polarity) +{ + return -1; +} + +static inline int xen_find_device_domain_owner(struct pci_dev *dev) +{ + return -1; +} +static inline int xen_register_device_domain_owner(struct pci_dev *dev, + uint16_t domain) +{ + return -1; +} +static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) +{ + return -1; +} +#endif + +#if defined(CONFIG_PCI_MSI) && defined(CONFIG_XEN_DOM0_PCI) +int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); +#else +static inline int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + return -1; +} +#endif + +#endif /* _ASM_X86_XEN_PCI_H */ diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h new file mode 100644 index 0000000..e4fe299 --- /dev/null +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -0,0 +1,14 @@ +#ifndef _ASM_X86_SWIOTLB_XEN_H +#define _ASM_X86_SWIOTLB_XEN_H + +#ifdef CONFIG_PCI_XEN +extern int xen_swiotlb; +extern int __init pci_xen_swiotlb_detect(void); +extern void __init pci_xen_swiotlb_init(void); +#else +#define xen_swiotlb 0 +static inline int __init pci_xen_swiotlb_detect(void) { return 0; } +static inline void __init pci_xen_swiotlb_init(void) { } +#endif + +#endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0c..d4f3b05 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -11,6 +11,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg +CFLAGS_REMOVE_pvclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif @@ -111,6 +112,7 @@ obj-$(CONFIG_X86_MRST) += mrst.o microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o +microcode-$(CONFIG_MICROCODE_XEN) += microcode_xen.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 23c2da8..a2a5125 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -42,6 +42,10 @@ #include #include +#include + +#include + static int __initdata acpi_force = 0; u32 acpi_rsdt_forced; int acpi_disabled; @@ -149,6 +153,10 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) { unsigned int ver = 0; + /* We don't want to register lapics when in Xen dom0 */ + if (xen_initial_domain()) + return; + if (!enabled) { ++disabled_cpus; return; @@ -461,9 +469,13 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) */ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { - unsigned int irq; + int irq; unsigned int plat_gsi = gsi; + irq = xen_register_gsi(gsi, trigger, polarity); + if (irq >= 0) + return irq; + #ifdef CONFIG_PCI /* * Make sure all (legacy) PCI IRQs are set as level-triggered. @@ -740,6 +752,10 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) static void __init acpi_register_lapic_address(unsigned long address) { + /* Xen dom0 doesn't have usable lapics */ + if (xen_initial_domain()) + return; + mp_lapic_addr = address; set_fixmap_nocache(FIX_APIC_BASE, address); @@ -860,6 +876,9 @@ int __init acpi_probe_gsi(void) max_gsi = gsi; } + if (xen_initial_domain()) + max_gsi += 255; /* Plus maximum entries of an ioapic. */ + return max_gsi + 1; } diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d85d1b2..8aabedd 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -12,6 +12,8 @@ #include #include +#include + static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) { struct acpi_object_list *obj_list; @@ -59,7 +61,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) /* * If mwait/monitor is unsupported, C2/C3_FFH will be disabled */ - if (!cpu_has(c, X86_FEATURE_MWAIT)) + if (!cpu_has(c, X86_FEATURE_MWAIT) && !xen_initial_domain()) buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); obj->type = ACPI_TYPE_BUFFER; @@ -88,6 +90,19 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) EXPORT_SYMBOL(arch_acpi_processor_init_pdc); +/* Initialize _PDC data based on the CPU vendor */ +void xen_arch_acpi_processor_init_pdc(struct acpi_processor *pr) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + pr->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL) + init_intel_pdc(pr, c); + + return; +} +EXPORT_SYMBOL(xen_arch_acpi_processor_init_pdc); + void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) { if (pr->pdc) { diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca93638..9eff23c 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -12,6 +12,8 @@ #include #include +#include + #include "realmode/wakeup.h" #include "sleep.h" diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f0fa7a1..0c1876b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } if (unlikely(address == -1)) - address = bad_dma_address; + address = DMA_ERROR_CODE; WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -1545,7 +1545,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, pte = dma_ops_get_pte(dom, address); if (!pte) - return bad_dma_address; + return DMA_ERROR_CODE; __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1626,7 +1626,7 @@ static dma_addr_t __map_single(struct device *dev, retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == bad_dma_address)) { + if (unlikely(address == DMA_ERROR_CODE)) { /* * setting next_address here will let the address * allocator only scan the new allocated range in the @@ -1647,7 +1647,7 @@ retry: start = address; for (i = 0; i < pages; ++i) { ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); - if (ret == bad_dma_address) + if (ret == DMA_ERROR_CODE) goto out_unmap; paddr += PAGE_SIZE; @@ -1675,7 +1675,7 @@ out_unmap: dma_ops_free_addresses(dma_dom, address, pages); - return bad_dma_address; + return DMA_ERROR_CODE; } /* @@ -1691,7 +1691,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == bad_dma_address) || + if ((dma_addr == DMA_ERROR_CODE) || (dma_addr + size > dma_dom->aperture_size)) return; @@ -1733,7 +1733,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, INC_STATS_COUNTER(cnt_map_single); if (!check_device(dev)) - return bad_dma_address; + return DMA_ERROR_CODE; dma_mask = *dev->dma_mask; @@ -1744,12 +1744,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page, return (dma_addr_t)paddr; if (!dma_ops_domain(domain)) - return bad_dma_address; + return DMA_ERROR_CODE; spin_lock_irqsave(&domain->lock, flags); addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, dma_mask); - if (addr == bad_dma_address) + if (addr == DMA_ERROR_CODE) goto out; iommu_completion_wait(iommu); @@ -1958,7 +1958,7 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, iommu, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) { + if (*dma_addr == DMA_ERROR_CODE) { spin_unlock_irqrestore(&domain->lock, flags); goto out_free; } @@ -2120,8 +2120,7 @@ int __init amd_iommu_init_dma_ops(void) prealloc_protection_domains(); iommu_detected = 1; - force_iommu = 1; - bad_dma_address = 0; + swiotlb = 0; #ifdef CONFIG_GART_IOMMU gart_iommu_aperture_disabled = 1; gart_iommu_aperture = 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3925adf..642793e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * definitions for the ACPI scanning code @@ -1183,19 +1184,10 @@ static struct sys_device device_amd_iommu = { * functions. Finally it prints some information about AMD IOMMUs and * the driver state and enables the hardware. */ -int __init amd_iommu_init(void) +static int __init amd_iommu_init(void) { int i, ret = 0; - - if (no_iommu) { - printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); - return 0; - } - - if (!amd_iommu_detected) - return -ENODEV; - /* * First parse ACPI tables to find the largest Bus/Dev/Func * we need to handle. Upon this information the shared data @@ -1310,6 +1302,7 @@ int __init amd_iommu_init(void) else printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); + x86_platform.iommu_shutdown = disable_iommus; out: return ret; @@ -1338,11 +1331,6 @@ free: goto out; } -void amd_iommu_shutdown(void) -{ - disable_iommus(); -} - /**************************************************************************** * * Early detect code. This code runs at IOMMU detection time in the DMA @@ -1357,16 +1345,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) void __init amd_iommu_detect(void) { - if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) + if (no_iommu || (iommu_detected && !gart_iommu_aperture)) return; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; amd_iommu_detected = 1; -#ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -#endif + x86_init.iommu.iommu_init = amd_iommu_init; } } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 082089e..8d34362 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -28,6 +28,7 @@ #include #include #include +#include int gart_iommu_aperture; int gart_iommu_aperture_disabled __initdata; @@ -401,6 +402,7 @@ void __init gart_iommu_hole_init(void) iommu_detected = 1; gart_iommu_aperture = 1; + x86_init.iommu.iommu_init = gart_iommu_init; ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); @@ -469,7 +471,7 @@ out: if (aper_alloc) { /* Got the aperture from the AGP bridge */ - } else if (swiotlb && !valid_agp) { + } else if (!valid_agp) { /* Do nothing */ } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0da6495..42d1fe2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -63,7 +63,12 @@ #include #include +#include #include +#include +#include + +#include #define __apicdebuginit(type) static type __init #define for_each_irq_pin(entry, head) \ @@ -390,14 +395,18 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector) static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + struct io_apic __iomem *io_apic; + + io_apic = io_apic_base(apic); writel(reg, &io_apic->index); return readl(&io_apic->data); } static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + struct io_apic __iomem *io_apic; + + io_apic = io_apic_base(apic); writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -410,7 +419,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i */ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + struct io_apic __iomem *io_apic; + + io_apic = io_apic_base(apic); if (sis_apic_bug) writel(reg, &io_apic->index); @@ -3487,6 +3498,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; + if (xen_pv_domain()) + return xen_pci_setup_msi_irqs(dev, nvec, type); + node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; @@ -3536,7 +3550,29 @@ error: void arch_teardown_msi_irq(unsigned int irq) { - destroy_irq(irq); + if (xen_domain()) + xen_pci_teardown_msi_irq(irq); + else + destroy_irq(irq); +} + +void arch_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + + /* If we are non-privileged PV domain, we have to + * to call xen_teardown_msi_dev first. */ + if (xen_domain()) + xen_pci_teardown_msi_dev(dev); + + list_for_each_entry(entry, &dev->msi_list, list) { + int i, nvec; + if (entry->irq == 0) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + arch_teardown_msi_irq(entry->irq + i); + } } #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) @@ -3852,7 +3888,14 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } +int get_nr_irqs_gsi(void) +{ + return nr_irqs_gsi; +} + #ifdef CONFIG_SPARSE_IRQ +int nr_dynamic_irqs; + int __init arch_probe_nr_irqs(void) { int nr; @@ -3870,6 +3913,8 @@ int __init arch_probe_nr_irqs(void) if (nr < nr_irqs) nr_irqs = nr; + nr_irqs += nr_dynamic_irqs; + return 0; } #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6..d1e6e60 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -558,6 +558,9 @@ void arch_trigger_all_cpu_backtrace(void) { int i; + if (!cpu_has_apic) + return; + cpumask_copy(&backtrace_mask, cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index f4361b5..404e458 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -1,3 +1,4 @@ obj-y := main.o if.o generic.o state.o cleanup.o obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o +obj-$(CONFIG_XEN_DOM0) += xen.o diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 33af141..378f8dc 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -108,6 +108,11 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) return 0; } +static int amd_num_var_ranges(void) +{ + return 2; +} + static struct mtrr_ops amd_mtrr_ops = { .vendor = X86_VENDOR_AMD, .set = amd_set_mtrr, @@ -115,6 +120,7 @@ static struct mtrr_ops amd_mtrr_ops = { .get_free_region = generic_get_free_region, .validate_add_page = amd_validate_add_page, .have_wrcomb = positive_have_wrcomb, + .num_var_ranges = amd_num_var_ranges, }; int __init amd_init_mtrr(void) diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index de89f14..7c686a0 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -110,6 +110,11 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t return 0; } +static int centaur_num_var_ranges(void) +{ + return 8; +} + static struct mtrr_ops centaur_mtrr_ops = { .vendor = X86_VENDOR_CENTAUR, .set = centaur_set_mcr, @@ -117,6 +122,7 @@ static struct mtrr_ops centaur_mtrr_ops = { .get_free_region = centaur_get_free_region, .validate_add_page = centaur_validate_add_page, .have_wrcomb = positive_have_wrcomb, + .num_var_ranges = centaur_num_var_ranges, }; int __init centaur_init_mtrr(void) diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 228d982..fd6edcc 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -265,6 +265,11 @@ static void cyrix_set_all(void) post_set(); } +static int cyrix_num_var_ranges(void) +{ + return 8; +} + static struct mtrr_ops cyrix_mtrr_ops = { .vendor = X86_VENDOR_CYRIX, .set_all = cyrix_set_all, @@ -273,6 +278,7 @@ static struct mtrr_ops cyrix_mtrr_ops = { .get_free_region = cyrix_get_free_region, .validate_add_page = generic_validate_add_page, .have_wrcomb = positive_have_wrcomb, + .num_var_ranges = cyrix_num_var_ranges, }; int __init cyrix_init_mtrr(void) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 55da0c5..42f30cd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -749,8 +749,16 @@ int positive_have_wrcomb(void) return 1; } -/* - * Generic structure... +static int generic_num_var_ranges(void) +{ + unsigned long config = 0, dummy; + + rdmsr(MSR_MTRRcap, config, dummy); + + return config & 0xff; +} + +/* generic structure... */ struct mtrr_ops generic_mtrr_ops = { .use_intel_if = 1, @@ -760,4 +768,5 @@ struct mtrr_ops generic_mtrr_ops = { .set = generic_set_mtrr, .validate_add_page = generic_validate_add_page, .have_wrcomb = generic_have_wrcomb, + .num_var_ranges = generic_num_var_ranges, }; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 84e83de..c8cb9ed 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -110,21 +110,6 @@ static int have_wrcomb(void) return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; } -/* This function returns the number of variable MTRRs */ -static void __init set_num_var_ranges(void) -{ - unsigned long config = 0, dummy; - - if (use_intel()) - rdmsr(MSR_MTRRcap, config, dummy); - else if (is_cpu(AMD)) - config = 2; - else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) - config = 8; - - num_var_ranges = config & 0xff; -} - static void __init init_table(void) { int i, max; @@ -711,8 +696,11 @@ void __init mtrr_bp_init(void) } } + /* Let Xen code override the above if it wants */ + xen_init_mtrr(); + if (mtrr_if) { - set_num_var_ranges(); + num_var_ranges = mtrr_if->num_var_ranges(); init_table(); if (use_intel()) { get_mtrr_state(); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index a501dee..98569c3 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -5,6 +5,8 @@ #include #include +#include + #define MTRR_CHANGE_MASK_FIXED 0x01 #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 @@ -25,6 +27,8 @@ struct mtrr_ops { int (*validate_add_page)(unsigned long base, unsigned long size, unsigned int type); int (*have_wrcomb)(void); + + int (*num_var_ranges)(void); }; extern int generic_get_free_region(unsigned long base, unsigned long size, @@ -73,6 +77,13 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned); int amd_init_mtrr(void); int cyrix_init_mtrr(void); int centaur_init_mtrr(void); +#ifdef CONFIG_XEN_DOM0 +void xen_init_mtrr(void); +#else +static inline void xen_init_mtrr(void) +{ +} +#endif extern int changed_by_mtrr_cleanup; extern int mtrr_cleanup(unsigned address_bits); diff --git a/arch/x86/kernel/cpu/mtrr/xen.c b/arch/x86/kernel/cpu/mtrr/xen.c new file mode 100644 index 0000000..852018b --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/xen.c @@ -0,0 +1,109 @@ +#include +#include + +#include + +#include "mtrr.h" + +#include +#include +#include +#include + +static void xen_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct xen_platform_op op; + int error; + + /* mtrr_ops->set() is called once per CPU, + * but Xen's ops apply to all CPUs. + */ + if (smp_processor_id()) + return; + + if (size == 0) { + op.cmd = XENPF_del_memtype; + op.u.del_memtype.handle = 0; + op.u.del_memtype.reg = reg; + } else { + op.cmd = XENPF_add_memtype; + op.u.add_memtype.mfn = base; + op.u.add_memtype.nr_mfns = size; + op.u.add_memtype.type = type; + } + + error = HYPERVISOR_dom0_op(&op); + BUG_ON(error != 0); +} + +static void xen_get_mtrr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type *type) +{ + struct xen_platform_op op; + + op.cmd = XENPF_read_memtype; + op.u.read_memtype.reg = reg; + if (HYPERVISOR_dom0_op(&op) != 0) { + *base = 0; + *size = 0; + *type = 0; + return; + } + + *size = op.u.read_memtype.nr_mfns; + *base = op.u.read_memtype.mfn; + *type = op.u.read_memtype.type; +} + +static int __init xen_num_var_ranges(void) +{ + int ranges; + struct xen_platform_op op; + + op.cmd = XENPF_read_memtype; + + for (ranges = 0; ; ranges++) { + op.u.read_memtype.reg = ranges; + if (HYPERVISOR_dom0_op(&op) != 0) + break; + } + return ranges; +} + +/* + * DOM0 TODO: Need to fill in the remaining mtrr methods to have full + * working userland mtrr support. + */ +static struct mtrr_ops xen_mtrr_ops = { + .vendor = X86_VENDOR_UNKNOWN, + .get_free_region = generic_get_free_region, + .set = xen_set_mtrr, + .get = xen_get_mtrr, + .have_wrcomb = positive_have_wrcomb, + .validate_add_page = generic_validate_add_page, + .use_intel_if = 0, + .num_var_ranges = xen_num_var_ranges, +}; + +void __init xen_init_mtrr(void) +{ + /* + * Check that we're running under Xen, and privileged enough + * to play with MTRRs. + */ + if (!xen_initial_domain()) + return; + + /* + * Check that the CPU has an MTRR implementation we can + * support. + */ + if (cpu_has_mtrr || + cpu_has_k6_mtrr || + cpu_has_cyrix_arr || + cpu_has_centaur_mcr) { + mtrr_if = &xen_mtrr_ops; + pat_init(); + } +} diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ff95824..ebd4c51 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,7 +28,6 @@ #include #include - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static void kdump_nmi_callback(int cpu, struct die_args *args) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d17d482..4d0aded 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -750,6 +750,36 @@ static int __init find_overlapped_early(u64 start, u64 end) return i; } +u64 __init early_res_next_free(u64 addr) +{ + int i; + u64 end = addr; + struct early_res *r; + + for (i = 0; i < MAX_EARLY_RES; i++) { + r = &early_res[i]; + if (addr >= r->start && addr < r->end) { + end = r->end; + break; + } + } + return end; +} + +u64 __init early_res_next_reserved(u64 addr, u64 max) +{ + int i; + struct early_res *r; + u64 next_res = max; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if ((r->start >= addr) && (r->start < next_res)) + next_res = r->start; + } + return next_res; +} + /* * Drop the i-th range from the early reservation map, * by copying any higher ranges down one over it, and diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d..7764118 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1088,6 +1088,9 @@ ENTRY(xen_failsafe_callback) .previous ENDPROC(xen_failsafe_callback) +BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, + xen_evtchn_do_upcall) + #endif /* CONFIG_XEN */ #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f..a626344 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1364,6 +1364,9 @@ ENTRY(xen_failsafe_callback) CFI_ENDPROC END(xen_failsafe_callback) +apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ + xen_hvm_callback_vector xen_evtchn_do_upcall + #endif /* CONFIG_XEN */ /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 19528ef..40e47cd 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -98,7 +98,7 @@ static int __init hpet_setup(char *str) } __setup("hpet=", hpet_setup); -static int __init disable_hpet(char *str) +int __init disable_hpet(char *str) { boot_hpet_disable = 1; return 1; diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 99c4d30..919c1a8 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -30,13 +30,29 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base, } } +void native_set_io_bitmap(struct thread_struct *t, + unsigned long bytes_updated) +{ + struct tss_struct *tss; + + if (!bytes_updated) + return; + + tss = &__get_cpu_var(init_tss); + + /* Update the TSS: */ + if (t->io_bitmap_ptr) + memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + else + memset(tss->io_bitmap, 0xff, bytes_updated); +} + /* * this changes the io permissions bitmap in the current task. */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct *t = ¤t->thread; - struct tss_struct *tss; unsigned int i, max_long, bytes, bytes_updated; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) @@ -61,13 +77,13 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) } /* - * do it in the per-thread copy and in the TSS ... + * do it in the per-thread copy * - * Disable preemption via get_cpu() - we must not switch away + * Disable preemption - we must not switch away * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + preempt_disable(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); @@ -85,10 +101,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) t->io_bitmap_max = bytes; - /* Update the TSS: */ - memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + set_io_bitmap(t, bytes_updated); - put_cpu(); + preempt_enable(); return 0; } @@ -119,11 +134,10 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) return 0; } -#ifdef CONFIG_X86_32 -long sys_iopl(struct pt_regs *regs) +asmlinkage long sys_iopl(unsigned int level) { - unsigned int level = regs->bx; struct thread_struct *t = ¤t->thread; + struct pt_regs *regs = task_pt_regs(current); int rc; rc = do_iopl(level, regs); @@ -135,9 +149,3 @@ long sys_iopl(struct pt_regs *regs) out: return rc; } -#else -asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) -{ - return do_iopl(level, regs); -} -#endif diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ec6ef60..fa5b061 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -109,6 +109,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mutex_init(&mm->context.lock); mm->context.size = 0; +#ifdef CONFIG_XEN + mm->context.has_foreign_mappings = 0; +#endif old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { mutex_lock(&old_mm->context.lock); diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8..86ca771 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -81,6 +81,8 @@ #include #include +#include +#include #include #include @@ -503,7 +505,9 @@ static int __init microcode_init(void) struct cpuinfo_x86 *c = &cpu_data(0); int error; - if (c->x86_vendor == X86_VENDOR_INTEL) + if (xen_pv_domain()) + microcode_ops = init_xen_microcode(); + else if (c->x86_vendor == X86_VENDOR_INTEL) microcode_ops = init_intel_microcode(); else if (c->x86_vendor == X86_VENDOR_AMD) microcode_ops = init_amd_microcode(); diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c new file mode 100644 index 0000000..16c742e --- /dev/null +++ b/arch/x86/kernel/microcode_xen.c @@ -0,0 +1,201 @@ +/* + * Xen microcode update driver + * + * Xen does most of the work here. We just pass the whole blob into + * Xen, and it will apply it to all CPUs as appropriate. Xen will + * worry about how different CPU models are actually updated. + */ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +MODULE_DESCRIPTION("Xen microcode update driver"); +MODULE_LICENSE("GPL"); + +struct xen_microcode { + size_t len; + char data[0]; +}; + +static int xen_microcode_update(int cpu) +{ + int err; + struct xen_platform_op op; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct xen_microcode *uc = uci->mc; + + if (uc == NULL || uc->len == 0) { + /* + * We do all cpus at once, so we don't need to do + * other cpus explicitly (besides, these vcpu numbers + * have no relationship to underlying physical cpus). + */ + return 0; + } + + op.cmd = XENPF_microcode_update; + set_xen_guest_handle(op.u.microcode.data, uc->data); + op.u.microcode.length = uc->len; + + err = HYPERVISOR_dom0_op(&op); + + if (err != 0) + printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err); + + return err; +} + +static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device) +{ + char name[30]; + struct cpuinfo_x86 *c = &cpu_data(cpu); + const struct firmware *firmware; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ret; + struct xen_microcode *uc; + size_t size; + int err; + + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x", + c->x86, c->x86_model, c->x86_mask); + break; + + case X86_VENDOR_AMD: + snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin"); + break; + + default: + return UCODE_NFOUND; + } + + err = request_firmware(&firmware, name, device); + if (err) { + pr_debug("microcode: data file %s load failed\n", name); + return UCODE_NFOUND; + } + + /* + * Only bother getting real firmware for cpu 0; the others get + * dummy placeholders. + */ + if (cpu == 0) + size = firmware->size; + else + size = 0; + + if (uci->mc != NULL) { + vfree(uci->mc); + uci->mc = NULL; + } + + ret = UCODE_ERROR; + uc = vmalloc(sizeof(*uc) + size); + if (uc == NULL) + goto out; + + ret = UCODE_OK; + uc->len = size; + memcpy(uc->data, firmware->data, uc->len); + + uci->mc = uc; + +out: + release_firmware(firmware); + + return ret; +} + +static enum ucode_state xen_request_microcode_user(int cpu, + const void __user *buf, size_t size) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct xen_microcode *uc; + enum ucode_state ret; + size_t unread; + + if (cpu != 0) { + /* No real firmware for non-zero cpus; just store a + placeholder */ + size = 0; + } + + if (uci->mc != NULL) { + vfree(uci->mc); + uci->mc = NULL; + } + + ret = UCODE_ERROR; + uc = vmalloc(sizeof(*uc) + size); + if (uc == NULL) + goto out; + + uc->len = size; + + ret = UCODE_NFOUND; + + /* XXX This sporadically returns uncopied bytes, so we return + EFAULT. As far as I can see, the usermode code + (microcode_ctl) isn't doing anything wrong... */ + unread = copy_from_user(uc->data, buf, size); + + if (unread != 0) { + printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n", + unread, size, buf, uc->data); + goto out; + } + + ret = UCODE_OK; + +out: + if (ret == 0) + uci->mc = uc; + else + vfree(uc); + + return ret; +} + +static void xen_microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + vfree(uci->mc); + uci->mc = NULL; +} + +static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig) +{ + sig->sig = 0; + sig->pf = 0; + sig->rev = 0; + + return 0; +} + +static struct microcode_ops microcode_xen_ops = { + .request_microcode_user = xen_request_microcode_user, + .request_microcode_fw = xen_request_microcode_fw, + .collect_cpu_info = xen_collect_cpu_info, + .apply_microcode = xen_microcode_update, + .microcode_fini_cpu = xen_microcode_fini_cpu, +}; + +struct microcode_ops * __init init_xen_microcode(void) +{ + if (!xen_initial_domain()) + return NULL; + return µcode_xen_ops; +} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b1739d..f7e115c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -376,6 +376,7 @@ struct pv_cpu_ops pv_cpu_ops = { .swapgs = native_swapgs, .set_iopl_mask = native_set_iopl_mask, + .set_io_bitmap = native_set_io_bitmap, .io_delay = native_io_delay, .start_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1a2d4b1..2f158a5 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -46,6 +46,7 @@ #include #include #include +#include #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; @@ -249,7 +250,7 @@ static unsigned long iommu_range_alloc(struct device *dev, if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); else - return bad_dma_address; + return DMA_ERROR_CODE; } } @@ -265,11 +266,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *vaddr, unsigned int npages, int direction) { unsigned long entry; - dma_addr_t ret = bad_dma_address; + dma_addr_t ret = DMA_ERROR_CODE; entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == bad_dma_address)) + if (unlikely(entry == DMA_ERROR_CODE)) goto error; /* set the return dma address */ @@ -284,7 +285,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, error: printk(KERN_WARNING "Calgary: failed to allocate %u pages in " "iommu %p\n", npages, tbl); - return bad_dma_address; + return DMA_ERROR_CODE; } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, @@ -295,8 +296,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; /* were we called with bad_dma_address? */ - badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { + badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); + if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; @@ -380,7 +381,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); - if (entry == bad_dma_address) { + if (entry == DMA_ERROR_CODE) { /* makes sure unmap knows to stop */ s->dma_length = 0; goto error; @@ -398,7 +399,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, error: calgary_unmap_sg(dev, sg, nelems, dir, NULL); for_each_sg(sg, s, nelems, i) { - sg->dma_address = bad_dma_address; + sg->dma_address = DMA_ERROR_CODE; sg->dma_length = 0; } return 0; @@ -453,7 +454,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, /* set up tces to cover the allocated range */ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == bad_dma_address) + if (mapping == DMA_ERROR_CODE) goto free; *dma_handle = mapping; return ret; @@ -734,7 +735,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) struct iommu_table *tbl = pci_iommu(dev->bus); /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); + iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); /* avoid the BIOS/VGA first 640KB-1MB region */ /* for CalIOC2 - avoid the entire first MB */ @@ -1349,6 +1350,23 @@ static void __init get_tce_space_from_tar(void) return; } +static int __init calgary_iommu_init(void) +{ + int ret; + + /* ok, we're trying to use Calgary - let's roll */ + printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); + + ret = calgary_init(); + if (ret) { + printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " + "falling back to no_iommu\n", ret); + return ret; + } + + return 0; +} + void __init detect_calgary(void) { int bus; @@ -1362,7 +1380,7 @@ void __init detect_calgary(void) * if the user specified iommu=off or iommu=soft or we found * another HW IOMMU already, bail out. */ - if (swiotlb || no_iommu || iommu_detected) + if (no_iommu || iommu_detected) return; if (!use_calgary) @@ -1447,9 +1465,7 @@ void __init detect_calgary(void) printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", specified_table_size); - /* swiotlb for devices that aren't behind the Calgary. */ - if (max_pfn > MAX_DMA32_PFN) - swiotlb = 1; + x86_init.iommu.iommu_init = calgary_iommu_init; } return; @@ -1462,35 +1478,6 @@ cleanup: } } -int __init calgary_iommu_init(void) -{ - int ret; - - if (no_iommu || (swiotlb && !calgary_detected)) - return -ENODEV; - - if (!calgary_detected) - return -ENODEV; - - /* ok, we're trying to use Calgary - let's roll */ - printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); - - ret = calgary_init(); - if (ret) { - printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " - "falling back to no_iommu\n", ret); - return ret; - } - - force_iommu = 1; - bad_dma_address = 0x0; - /* dma_ops is set to swiotlb or nommu */ - if (!dma_ops) - dma_ops = &nommu_dma_ops; - - return 0; -} - static int __init calgary_parse_options(char *p) { unsigned int bridge; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 6ac3931..3e57c58 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,10 +11,12 @@ #include #include #include +#include +#include static int forbid_dac __read_mostly; -struct dma_map_ops *dma_ops; +struct dma_map_ops *dma_ops = &nommu_dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -42,9 +44,6 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; -dma_addr_t bad_dma_address __read_mostly = 0; -EXPORT_SYMBOL(bad_dma_address); - /* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", @@ -126,18 +125,19 @@ void __init pci_iommu_alloc(void) /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif + if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) + goto out; - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ gart_iommu_hole_init(); detect_calgary(); detect_intel_iommu(); + /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); +out: + pci_xen_swiotlb_init(); pci_swiotlb_init(); } @@ -289,25 +289,17 @@ static int __init pci_iommu_init(void) #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif + x86_init.iommu.iommu_init(); - calgary_iommu_init(); - - intel_iommu_init(); - - amd_iommu_init(); + if (swiotlb || xen_swiotlb) { + printk(KERN_INFO "PCI-DMA: " + "Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_print_info(); + } else + swiotlb_free(); - gart_iommu_init(); - - no_iommu_init(); return 0; } - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); - - amd_iommu_shutdown(); -} /* Must execute after PCI subsystem */ rootfs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1c76691..8c9dd05 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -39,6 +39,7 @@ #include #include #include +#include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +static dma_addr_t bad_dma_addr; + /* * If this is disabled the IOMMU will use an optimized flushing strategy * of only flushing when an mapping is reused. With it true the GART is @@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); - return bad_dma_address; + return bad_dma_addr; } for (i = 0; i < npages; i++) { @@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir, 0); - if (addr == bad_dma_address) { + if (addr == bad_dma_addr) { if (i > 0) gart_unmap_sg(dev, sg, i, dir, NULL); nents = 0; @@ -455,7 +458,7 @@ error: iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) - s->dma_address = bad_dma_address; + s->dma_address = bad_dma_addr; return 0; } @@ -479,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, DMA_BIDIRECTIONAL, align_mask); flush_gart(); - if (paddr != bad_dma_address) { + if (paddr != bad_dma_addr) { *dma_addr = paddr; return page_address(page); } @@ -499,6 +502,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } +static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return (dma_addr == bad_dma_addr); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -689,14 +697,15 @@ static struct dma_map_ops gart_dma_ops = { .unmap_page = gart_unmap_page, .alloc_coherent = gart_alloc_coherent, .free_coherent = gart_free_coherent, + .mapping_error = gart_mapping_error, }; -void gart_iommu_shutdown(void) +static void gart_iommu_shutdown(void) { struct pci_dev *dev; int i; - if (no_agp && (dma_ops != &gart_dma_ops)) + if (no_agp) return; for (i = 0; i < num_k8_northbridges; i++) { @@ -711,7 +720,7 @@ void gart_iommu_shutdown(void) } } -void __init gart_iommu_init(void) +int __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long iommu_start; @@ -721,7 +730,7 @@ void __init gart_iommu_init(void) long i; if (num_k8_northbridges == 0) - return; + return 0; #ifndef CONFIG_AGP_AMD64 no_agp = 1; @@ -733,13 +742,6 @@ void __init gart_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) - return; - - /* Did we detect a different HW IOMMU? */ - if (iommu_detected && !gart_iommu_aperture) - return; - if (no_iommu || (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || @@ -749,7 +751,7 @@ void __init gart_iommu_init(void) "but GART IOMMU not available.\n"); printk(KERN_WARNING "falling back to iommu=soft.\n"); } - return; + return 0; } /* need to map that range */ @@ -794,7 +796,7 @@ void __init gart_iommu_init(void) iommu_start = aper_size - iommu_size; iommu_bus_base = info.aper_base + iommu_start; - bad_dma_address = iommu_bus_base; + bad_dma_addr = iommu_bus_base; iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* @@ -841,6 +843,10 @@ void __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; + x86_platform.iommu_shutdown = gart_iommu_shutdown; + swiotlb = 0; + + return 0; } void __init gart_parse_options(char *p) diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4..22be12b 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) - return bad_dma_address; + return DMA_ERROR_CODE; flush_write_buffers(); return bus; } @@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, }; - -void __init no_iommu_init(void) -{ - if (dma_ops) - return; - - force_iommu = 0; /* no HW IOMMU */ - dma_ops = &nommu_dma_ops; -} diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b78..7d2829d 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,18 +42,31 @@ static struct dma_map_ops swiotlb_dma_ops = { .dma_supported = NULL, }; -void __init pci_swiotlb_init(void) +/* + * pci_swiotlb_detect - set swiotlb to 1 if necessary + * + * This returns non-zero if we are forced to use swiotlb (by the boot + * option). + */ +int __init pci_swiotlb_detect(void) { + int use_swiotlb = swiotlb | swiotlb_force; + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) + if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif if (swiotlb_force) swiotlb = 1; + + return use_swiotlb; +} + +void __init pci_swiotlb_init(void) +{ if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_init(); + swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5fd5b07..11d8667 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -73,16 +73,12 @@ void exit_thread(void) unsigned long *bp = t->io_bitmap_ptr; if (bp) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); - + preempt_disable(); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, t->io_bitmap_max); + set_io_bitmap(t, t->io_bitmap_max); t->io_bitmap_max = 0; - put_cpu(); + preempt_enable(); kfree(bp); } } @@ -199,19 +195,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP) || + test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) + set_io_bitmap(next, + max(prev->io_bitmap_max, next->io_bitmap_max)); } int sys_fork(struct pt_regs *regs) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 269c2a3..8e1aac8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -23,7 +23,7 @@ # include # include #else -# include +# include #endif /* @@ -647,7 +647,7 @@ void native_machine_shutdown(void) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d7a0888..594e324 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -70,6 +70,7 @@ #include #include