diff options
Diffstat (limited to 'arch/i386/kernel')
49 files changed, 1029 insertions, 973 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 4cc83b322b3..f10de0f2c5e 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -7,11 +7,11 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o + doublefault.o quirks.o i8237.o obj-y += cpu/ obj-y += timers/ -obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 5e291a20c03..267ca48e1b6 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_ACPI_BOOT) := boot.o +obj-y := boot.o obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index b7808a89d94..a63351c085c 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -40,19 +40,25 @@ #ifdef CONFIG_X86_64 -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { } +static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +} extern void __init clustered_apic_check(void); -static inline int ioapic_setup_disabled(void) { return 0; } +static inline int ioapic_setup_disabled(void) +{ + return 0; +} + #include <asm/proto.h> -#else /* X86 */ +#else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC #include <mach_apic.h> #include <mach_mpparse.h> -#endif /* CONFIG_X86_LOCAL_APIC */ +#endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* X86 */ +#endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ @@ -60,13 +66,8 @@ static inline int ioapic_setup_disabled(void) { return 0; } #define PREFIX "ACPI: " -#ifdef CONFIG_ACPI_PCI int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ -int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ -#else -int acpi_noirq __initdata = 1; -int acpi_pci_disabled __initdata = 1; -#endif +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ int acpi_ht __initdata = 1; /* enable HT */ int acpi_lapic; @@ -88,7 +89,7 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #define MAX_MADT_ENTRIES 256 u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] = - { [0 ... MAX_MADT_ENTRIES-1] = 0xff }; + {[0 ... MAX_MADT_ENTRIES - 1] = 0xff }; EXPORT_SYMBOL(x86_acpiid_to_apicid); /* -------------------------------------------------------------------------- @@ -99,7 +100,7 @@ EXPORT_SYMBOL(x86_acpiid_to_apicid); * The default interrupt routing model is PIC (8259). This gets * overriden if IOAPICs are enumerated (below). */ -enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; #ifdef CONFIG_X86_64 @@ -107,7 +108,7 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; char *__acpi_map_table(unsigned long phys_addr, unsigned long size) { if (!phys_addr || !size) - return NULL; + return NULL; if (phys_addr < (end_pfn_map << PAGE_SHIFT)) return __va(phys_addr); @@ -134,8 +135,8 @@ char *__acpi_map_table(unsigned long phys, unsigned long size) unsigned long base, offset, mapped_size; int idx; - if (phys + size < 8*1024*1024) - return __va(phys); + if (phys + size < 8 * 1024 * 1024) + return __va(phys); offset = phys & (PAGE_SIZE - 1); mapped_size = PAGE_SIZE - offset; @@ -154,7 +155,7 @@ char *__acpi_map_table(unsigned long phys, unsigned long size) mapped_size += PAGE_SIZE; } - return ((unsigned char *) base + offset); + return ((unsigned char *)base + offset); } #endif @@ -172,7 +173,7 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) if (!phys_addr || !size) return -EINVAL; - mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size); + mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size); if (!mcfg) { printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); return -ENODEV; @@ -209,20 +210,17 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) return 0; } -#endif /* CONFIG_PCI_MMCONFIG */ +#endif /* CONFIG_PCI_MMCONFIG */ #ifdef CONFIG_X86_LOCAL_APIC -static int __init -acpi_parse_madt ( - unsigned long phys_addr, - unsigned long size) +static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size) { - struct acpi_table_madt *madt = NULL; + struct acpi_table_madt *madt = NULL; if (!phys_addr || !size) return -EINVAL; - madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); + madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size); if (!madt) { printk(KERN_WARNING PREFIX "Unable to map MADT\n"); return -ENODEV; @@ -232,22 +230,20 @@ acpi_parse_madt ( acpi_lapic_addr = (u64) madt->lapic_address; printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", - madt->lapic_address); + madt->lapic_address); } acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); - + return 0; } - static int __init -acpi_parse_lapic ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) { - struct acpi_table_lapic *processor = NULL; + struct acpi_table_lapic *processor = NULL; - processor = (struct acpi_table_lapic*) header; + processor = (struct acpi_table_lapic *)header; if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; @@ -260,20 +256,19 @@ acpi_parse_lapic ( x86_acpiid_to_apicid[processor->acpi_id] = processor->id; - mp_register_lapic ( - processor->id, /* APIC ID */ - processor->flags.enabled); /* Enabled? */ + mp_register_lapic(processor->id, /* APIC ID */ + processor->flags.enabled); /* Enabled? */ return 0; } static int __init -acpi_parse_lapic_addr_ovr ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header, + const unsigned long end) { struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; - lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; + lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header; if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) return -EINVAL; @@ -284,12 +279,11 @@ acpi_parse_lapic_addr_ovr ( } static int __init -acpi_parse_lapic_nmi ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end) { struct acpi_table_lapic_nmi *lapic_nmi = NULL; - lapic_nmi = (struct acpi_table_lapic_nmi*) header; + lapic_nmi = (struct acpi_table_lapic_nmi *)header; if (BAD_MADT_ENTRY(lapic_nmi, end)) return -EINVAL; @@ -302,37 +296,32 @@ acpi_parse_lapic_nmi ( return 0; } +#endif /*CONFIG_X86_LOCAL_APIC */ -#endif /*CONFIG_X86_LOCAL_APIC*/ - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#ifdef CONFIG_X86_IO_APIC static int __init -acpi_parse_ioapic ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) { struct acpi_table_ioapic *ioapic = NULL; - ioapic = (struct acpi_table_ioapic*) header; + ioapic = (struct acpi_table_ioapic *)header; if (BAD_MADT_ENTRY(ioapic, end)) return -EINVAL; - + acpi_table_print_madt_entry(header); - mp_register_ioapic ( - ioapic->id, - ioapic->address, - ioapic->global_irq_base); - + mp_register_ioapic(ioapic->id, + ioapic->address, ioapic->global_irq_base); + return 0; } /* * Parse Interrupt Source Override for the ACPI SCI */ -static void -acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) +static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) { if (trigger == 0) /* compatible SCI trigger is level */ trigger = 3; @@ -348,7 +337,7 @@ acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) polarity = acpi_sci_flags.polarity; /* - * mp_config_acpi_legacy_irqs() already setup IRQs < 16 + * mp_config_acpi_legacy_irqs() already setup IRQs < 16 * If GSI is < 16, this will update its flags, * else it will create a new mp_irqs[] entry. */ @@ -363,12 +352,12 @@ acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) } static int __init -acpi_parse_int_src_ovr ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_int_src_ovr(acpi_table_entry_header * header, + const unsigned long end) { struct acpi_table_int_src_ovr *intsrc = NULL; - intsrc = (struct acpi_table_int_src_ovr*) header; + intsrc = (struct acpi_table_int_src_ovr *)header; if (BAD_MADT_ENTRY(intsrc, end)) return -EINVAL; @@ -377,33 +366,30 @@ acpi_parse_int_src_ovr ( if (intsrc->bus_irq == acpi_fadt.sci_int) { acpi_sci_ioapic_setup(intsrc->global_irq, - intsrc->flags.polarity, intsrc->flags.trigger); + intsrc->flags.polarity, + intsrc->flags.trigger); return 0; } if (acpi_skip_timer_override && - intsrc->bus_irq == 0 && intsrc->global_irq == 2) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); - return 0; + intsrc->bus_irq == 0 && intsrc->global_irq == 2) { + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + return 0; } - mp_override_legacy_irq ( - intsrc->bus_irq, - intsrc->flags.polarity, - intsrc->flags.trigger, - intsrc->global_irq); + mp_override_legacy_irq(intsrc->bus_irq, + intsrc->flags.polarity, + intsrc->flags.trigger, intsrc->global_irq); return 0; } - static int __init -acpi_parse_nmi_src ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end) { struct acpi_table_nmi_src *nmi_src = NULL; - nmi_src = (struct acpi_table_nmi_src*) header; + nmi_src = (struct acpi_table_nmi_src *)header; if (BAD_MADT_ENTRY(nmi_src, end)) return -EINVAL; @@ -415,9 +401,7 @@ acpi_parse_nmi_src ( return 0; } -#endif /* CONFIG_X86_IO_APIC */ - -#ifdef CONFIG_ACPI_BUS +#endif /* CONFIG_X86_IO_APIC */ /* * acpi_pic_sci_set_trigger() @@ -433,8 +417,7 @@ acpi_parse_nmi_src ( * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) */ -void __init -acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) +void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) { unsigned int mask = 1 << irq; unsigned int old, new; @@ -454,10 +437,10 @@ acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) * routing tables.. */ switch (trigger) { - case 1: /* Edge - clear */ + case 1: /* Edge - clear */ new &= ~mask; break; - case 3: /* Level - set */ + case 3: /* Level - set */ new |= mask; break; } @@ -470,21 +453,22 @@ acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) outb(new >> 8, 0x4d1); } - -#endif /* CONFIG_ACPI_BUS */ - int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) { #ifdef CONFIG_X86_IO_APIC if (use_pci_vector() && !platform_legacy_irq(gsi)) - *irq = IO_APIC_VECTOR(gsi); + *irq = IO_APIC_VECTOR(gsi); else #endif *irq = gsi; return 0; } -unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -497,7 +481,7 @@ unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) extern void eisa_set_level_irq(unsigned int irq); if (edge_level == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); + eisa_set_level_irq(gsi); } #endif @@ -509,60 +493,58 @@ unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) acpi_gsi_to_irq(plat_gsi, &irq); return irq; } + EXPORT_SYMBOL(acpi_register_gsi); /* * ACPI based hotplug support for CPU */ #ifdef CONFIG_ACPI_HOTPLUG_CPU -int -acpi_map_lsapic(acpi_handle handle, int *pcpu) +int acpi_map_lsapic(acpi_handle handle, int *pcpu) { /* TBD */ return -EINVAL; } -EXPORT_SYMBOL(acpi_map_lsapic); +EXPORT_SYMBOL(acpi_map_lsapic); -int -acpi_unmap_lsapic(int cpu) +int acpi_unmap_lsapic(int cpu) { /* TBD */ return -EINVAL; } + EXPORT_SYMBOL(acpi_unmap_lsapic); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ -int -acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) +int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { /* TBD */ return -EINVAL; } + EXPORT_SYMBOL(acpi_register_ioapic); -int -acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) +int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) { /* TBD */ return -EINVAL; } + EXPORT_SYMBOL(acpi_unregister_ioapic); static unsigned long __init -acpi_scan_rsdp ( - unsigned long start, - unsigned long length) +acpi_scan_rsdp(unsigned long start, unsigned long length) { - unsigned long offset = 0; - unsigned long sig_len = sizeof("RSD PTR ") - 1; + unsigned long offset = 0; + unsigned long sig_len = sizeof("RSD PTR ") - 1; /* * Scan all 16-byte boundaries of the physical memory region for the * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } @@ -575,20 +557,19 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) struct acpi_table_sbf *sb; if (!phys_addr || !size) - return -EINVAL; + return -EINVAL; - sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size); + sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size); if (!sb) { printk(KERN_WARNING PREFIX "Unable to map SBF\n"); return -ENODEV; } - sbf_port = sb->sbf_cmos; /* Save CMOS port */ + sbf_port = sb->sbf_cmos; /* Save CMOS port */ return 0; } - #ifdef CONFIG_HPET_TIMER static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) @@ -598,7 +579,7 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) if (!phys || !size) return -EINVAL; - hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size); + hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size); if (!hpet_tbl) { printk(KERN_WARNING PREFIX "Unable to map HPET\n"); return -ENODEV; @@ -609,22 +590,21 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) "memory.\n"); return -1; } - #ifdef CONFIG_X86_64 - vxtime.hpet_address = hpet_tbl->addr.addrl | - ((long) hpet_tbl->addr.addrh << 32); + vxtime.hpet_address = hpet_tbl->addr.addrl | + ((long)hpet_tbl->addr.addrh << 32); - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, vxtime.hpet_address); -#else /* X86 */ + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, vxtime.hpet_address); +#else /* X86 */ { extern unsigned long hpet_address; hpet_address = hpet_tbl->addr.addrl; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); + hpet_tbl->id, hpet_address); } -#endif /* X86 */ +#endif /* X86 */ return 0; } @@ -640,28 +620,25 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { struct fadt_descriptor_rev2 *fadt = NULL; - fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); - if(!fadt) { + fadt = (struct fadt_descriptor_rev2 *)__acpi_map_table(phys, size); + if (!fadt) { printk(KERN_WARNING PREFIX "Unable to map FADT\n"); return 0; } - -#ifdef CONFIG_ACPI_INTERPRETER /* initialize sci_int early for INT_SRC_OVR MADT parsing */ acpi_fadt.sci_int = fadt->sci_int; -#endif -#ifdef CONFIG_ACPI_BUS /* initialize rev and apic_phys_dest_mode for x86_64 genapic */ acpi_fadt.revision = fadt->revision; - acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; -#endif + acpi_fadt.force_apic_physical_destination_mode = + fadt->force_apic_physical_destination_mode; #ifdef CONFIG_X86_PM_TIMER /* detect the location of the ACPI PM Timer */ if (fadt->revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ - if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) + if (fadt->xpm_tmr_blk.address_space_id != + ACPI_ADR_SPACE_SYSTEM_IO) return 0; pmtmr_ioport = fadt->xpm_tmr_blk.address; @@ -670,16 +647,15 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) pmtmr_ioport = fadt->V1_pm_tmr_blk; } if (pmtmr_ioport) - printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", + pmtmr_ioport); #endif return 0; } - -unsigned long __init -acpi_find_rsdp (void) +unsigned long __init acpi_find_rsdp(void) { - unsigned long rsdp_phys = 0; + unsigned long rsdp_phys = 0; if (efi_enabled) { if (efi.acpi20) @@ -691,9 +667,9 @@ acpi_find_rsdp (void) * Scan memory looking for the RSDP signature. First search EBDA (low * memory) paragraphs and then search upper memory (E0000-FFFFF). */ - rsdp_phys = acpi_scan_rsdp (0, 0x400); + rsdp_phys = acpi_scan_rsdp(0, 0x400); if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000); + rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000); return rsdp_phys; } @@ -703,8 +679,7 @@ acpi_find_rsdp (void) * Parse LAPIC entries in MADT * returns 0 on success, < 0 on error */ -static int __init -acpi_parse_madt_lapic_entries(void) +static int __init acpi_parse_madt_lapic_entries(void) { int count; @@ -713,28 +688,31 @@ acpi_parse_madt_lapic_entries(void) * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). */ - count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0); + count = + acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, + acpi_parse_lapic_addr_ovr, 0); if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); return count; } mp_register_lapic_address(acpi_lapic_addr); count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic, - MAX_APICS); - if (!count) { + MAX_APICS); + if (!count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); /* TBD: Cleanup to allow fallback to MPS */ return -ENODEV; - } - else if (count < 0) { + } else if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ return count; } - count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); + count = + acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); /* TBD: Cleanup to allow fallback to MPS */ @@ -742,15 +720,14 @@ acpi_parse_madt_lapic_entries(void) } return 0; } -#endif /* CONFIG_X86_LOCAL_APIC */ +#endif /* CONFIG_X86_LOCAL_APIC */ -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#ifdef CONFIG_X86_IO_APIC /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error */ -static int __init -acpi_parse_madt_ioapic_entries(void) +static int __init acpi_parse_madt_ioapic_entries(void) { int count; @@ -762,30 +739,34 @@ acpi_parse_madt_ioapic_entries(void) */ if (acpi_disabled || acpi_noirq) { return -ENODEV; - } + } /* - * if "noapic" boot option, don't look for IO-APICs + * if "noapic" boot option, don't look for IO-APICs */ if (skip_ioapic_setup) { printk(KERN_INFO PREFIX "Skipping IOAPIC probe " - "due to 'noapic' option.\n"); + "due to 'noapic' option.\n"); return -ENODEV; } - count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS); + count = + acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, + MAX_IO_APICS); if (!count) { printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); return -ENODEV; - } - else if (count < 0) { + } else if (count < 0) { printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); return count; } - count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS); + count = + acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, + NR_IRQ_VECTORS); if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); + printk(KERN_ERR PREFIX + "Error parsing interrupt source overrides entry\n"); /* TBD: Cleanup to allow fallback to MPS */ return count; } @@ -800,7 +781,9 @@ acpi_parse_madt_ioapic_entries(void) /* Fill in identity legacy mapings where no override */ mp_config_acpi_legacy_irqs(); - count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS); + count = + acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, + NR_IRQ_VECTORS); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ @@ -814,11 +797,9 @@ static inline int acpi_parse_madt_ioapic_entries(void) { return -1; } -#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */ - +#endif /* !CONFIG_X86_IO_APIC */ -static void __init -acpi_process_madt(void) +static void __init acpi_process_madt(void) { #ifdef CONFIG_X86_LOCAL_APIC int count, error; @@ -833,6 +814,9 @@ acpi_process_madt(void) if (!error) { acpi_lapic = 1; +#ifdef CONFIG_X86_GENERICARCH + generic_bigsmp_probe(); +#endif /* * Parse MADT IO-APIC entries */ @@ -850,7 +834,8 @@ acpi_process_madt(void) /* * Dell Precision Workstation 410, 610 come here. */ - printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n"); + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); disable_acpi(); } } @@ -862,7 +847,6 @@ extern int acpi_force; #ifdef __i386__ -#ifdef CONFIG_ACPI_PCI static int __init disable_acpi_irq(struct dmi_system_id *d) { if (!acpi_force) { @@ -882,12 +866,11 @@ static int __init disable_acpi_pci(struct dmi_system_id *d) } return 0; } -#endif static int __init dmi_disable_acpi(struct dmi_system_id *d) { if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); + printk(KERN_NOTICE "%s detected: acpi off\n", d->ident); disable_acpi(); } else { printk(KERN_NOTICE @@ -902,7 +885,8 @@ static int __init dmi_disable_acpi(struct dmi_system_id *d) static int __init force_acpi_ht(struct dmi_system_id *d) { if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", + d->ident); disable_acpi(); acpi_ht = 1; } else { @@ -921,155 +905,155 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { * Boxes that need ACPI disabled */ { - .callback = dmi_disable_acpi, - .ident = "IBM Thinkpad", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), - }, - }, + .callback = dmi_disable_acpi, + .ident = "IBM Thinkpad", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), + }, + }, /* * Boxes that need acpi=ht */ { - .callback = force_acpi_ht, - .ident = "FSC Primergy T850", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), - DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), - }, - }, + .callback = force_acpi_ht, + .ident = "FSC Primergy T850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), + }, + }, { - .callback = force_acpi_ht, - .ident = "DELL GX240", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), - }, - }, + .callback = force_acpi_ht, + .ident = "DELL GX240", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), + }, + }, { - .callback = force_acpi_ht, - .ident = "HP VISUALIZE NT Workstation", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), - }, - }, + .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), + }, + }, { - .callback = force_acpi_ht, - .ident = "Compaq Workstation W8000", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), - DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), - }, - }, + .callback = force_acpi_ht, + .ident = "Compaq Workstation W8000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ASUS P4B266", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P4B266"), - }, - }, + .callback = force_acpi_ht, + .ident = "ASUS P4B266", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4B266"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ASUS P2B-DS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), - }, - }, + .callback = force_acpi_ht, + .ident = "ASUS P2B-DS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ASUS CUR-DLS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), - }, - }, + .callback = force_acpi_ht, + .ident = "ASUS CUR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ABIT i440BX-W83977", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), - DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), - }, - }, + .callback = force_acpi_ht, + .ident = "ABIT i440BX-W83977", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), + DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM Bladecenter", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), - }, - }, + .callback = force_acpi_ht, + .ident = "IBM Bladecenter", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM eServer xSeries 360", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), - }, - }, + .callback = force_acpi_ht, + .ident = "IBM eServer xSeries 360", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM eserver xSeries 330", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), - }, - }, + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 330", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM eserver xSeries 440", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), - }, - }, - -#ifdef CONFIG_ACPI_PCI + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 440", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), + }, + }, + /* * Boxes that need ACPI PCI IRQ routing disabled */ { - .callback = disable_acpi_irq, - .ident = "ASUS A7V", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), - DMI_MATCH(DMI_BOARD_NAME, "<A7V>"), - /* newer BIOS, Revision 1011, does work */ - DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), - }, - }, + .callback = disable_acpi_irq, + .ident = "ASUS A7V", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + DMI_MATCH(DMI_BOARD_NAME, "<A7V>"), + /* newer BIOS, Revision 1011, does work */ + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, /* * Boxes that need ACPI PCI IRQ routing and PCI scan disabled */ - { /* _BBN 0 bug */ - .callback = disable_acpi_pci, - .ident = "ASUS PR-DLS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), - DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), - DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") - }, - }, + { /* _BBN 0 bug */ + .callback = disable_acpi_pci, + .ident = "ASUS PR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS PR-DLS ACPI BIOS Revision 1010"), + DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") + }, + }, { - .callback = disable_acpi_pci, - .ident = "Acer TravelMate 36x Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - }, - }, -#endif - { } + .callback = disable_acpi_pci, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, + {} }; -#endif /* __i386__ */ +#endif /* __i386__ */ /* * acpi_boot_table_init() and acpi_boot_init() @@ -1094,8 +1078,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { * !0: failure */ -int __init -acpi_boot_table_init(void) +int __init acpi_boot_table_init(void) { int error; @@ -1108,7 +1091,7 @@ acpi_boot_table_init(void) * One exception: acpi=ht continues far enough to enumerate LAPICs */ if (acpi_disabled && !acpi_ht) - return 1; + return 1; /* * Initialize the ACPI boot-time table parser. @@ -1118,7 +1101,6 @@ acpi_boot_table_init(void) disable_acpi(); return error; } - #ifdef __i386__ check_acpi_pci(); #endif @@ -1142,7 +1124,6 @@ acpi_boot_table_init(void) return 0; } - int __init acpi_boot_init(void) { /* @@ -1150,7 +1131,7 @@ int __init acpi_boot_init(void) * One exception: acpi=ht continues far enough to enumerate LAPICs */ if (acpi_disabled && !acpi_ht) - return 1; + return 1; acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); @@ -1168,4 +1149,3 @@ int __init acpi_boot_init(void) return 0; } - diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 726a5ca4b16..f1b9d2a46da 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -8,44 +8,44 @@ #include <asm/pci-direct.h> #include <asm/acpi.h> -static int __init check_bridge(int vendor, int device) +static int __init check_bridge(int vendor, int device) { /* According to Nvidia all timer overrides are bogus. Just ignore them all. */ - if (vendor == PCI_VENDOR_ID_NVIDIA) { - acpi_skip_timer_override = 1; + if (vendor == PCI_VENDOR_ID_NVIDIA) { + acpi_skip_timer_override = 1; } return 0; } - -void __init check_acpi_pci(void) -{ - int num,slot,func; + +void __init check_acpi_pci(void) +{ + int num, slot, func; /* Assume the machine supports type 1. If not it will always read ffffffff and should not have any side effect. */ /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { + for (num = 0; num < 32; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { u32 class; u32 vendor; - class = read_pci_config(num,slot,func, + class = read_pci_config(num, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) - break; + break; if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) - continue; - - vendor = read_pci_config(num, slot, func, + continue; + + vendor = read_pci_config(num, slot, func, PCI_VENDOR_ID); - - if (check_bridge(vendor&0xffff, vendor >> 16)) - return; - } - + + if (check_bridge(vendor & 0xffff, vendor >> 16)) + return; + } + } } } diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index c1af93032ff..1cb2b186a3a 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -20,12 +20,13 @@ extern void zap_low_mappings(void); extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); -static void init_low_mapping(pgd_t *pgd, int pgd_limit) +static void init_low_mapping(pgd_t * pgd, int pgd_limit) { int pgd_ofs = 0; - while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { - set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD)); + while ((pgd_ofs < pgd_limit) + && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { + set_pgd(pgd, *(pgd + USER_PTRS_PER_PGD)); pgd_ofs++, pgd++; } flush_tlb_all(); @@ -37,12 +38,13 @@ static void init_low_mapping(pgd_t *pgd, int pgd_limit) * Create an identity mapped page table and copy the wakeup routine to * low memory. */ -int acpi_save_state_mem (void) +int acpi_save_state_mem(void) { if (!acpi_wakeup_address) return 1; init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD); - memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); acpi_copy_wakeup_routine(acpi_wakeup_address); return 0; @@ -51,7 +53,7 @@ int acpi_save_state_mem (void) /* * acpi_restore_state - undo effects of acpi_save_state_mem */ -void acpi_restore_state_mem (void) +void acpi_restore_state_mem(void) { zap_low_mappings(); } @@ -67,7 +69,8 @@ void acpi_restore_state_mem (void) void __init acpi_reserve_bootmem(void) { if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { - printk(KERN_ERR "ACPI: Wakeup code way too big, S3 disabled.\n"); + printk(KERN_ERR + "ACPI: Wakeup code way too big, S3 disabled.\n"); return; } @@ -90,10 +93,8 @@ static int __init acpi_sleep_setup(char *str) return 1; } - __setup("acpi_sleep=", acpi_sleep_setup); - static __init int reset_videomode_after_s3(struct dmi_system_id *d) { acpi_video_flags |= 2; @@ -101,14 +102,14 @@ static __init int reset_videomode_after_s3(struct dmi_system_id *d) } static __initdata struct dmi_system_id acpisleep_dmi_table[] = { - { /* Reset video mode after returning from ACPI S3 sleep */ - .callback = reset_videomode_after_s3, - .ident = "Toshiba Satellite 4030cdt", - .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - }, - }, - { } + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + {} }; static int __init acpisleep_dmi_init(void) diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 4553ffd94b1..46ce9b248f5 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -613,8 +613,8 @@ void __devinit cpu_init(void) memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), GDT_ENTRY_TLS_ENTRIES * 8); - __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); - __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); + load_gdt(&cpu_gdt_descr[cpu]); + load_idt(&idt_descr); /* * Delete NT @@ -642,12 +642,12 @@ void __devinit cpu_init(void) asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); /* Clear all 6 debug registers: */ - -#define CD(register) set_debugreg(0, register) - - CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); - -#undef CD + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); /* * Force FPU initialization: diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 60a9e54dd20..822c8ce9d1f 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -31,6 +31,7 @@ #include <linux/cpufreq.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/compiler.h> #include <asm/io.h> #include <asm/delay.h> #include <asm/uaccess.h> @@ -57,6 +58,8 @@ static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; static struct cpufreq_driver acpi_cpufreq_driver; +static unsigned int acpi_pstate_strict; + static int acpi_processor_write_port( u16 port, @@ -163,34 +166,44 @@ acpi_processor_set_performance ( } /* - * Then we read the 'status_register' and compare the value with the - * target state's 'status' to make sure the transition was successful. - * Note that we'll poll for up to 1ms (100 cycles of 10us) before - * giving up. + * Assume the write went through when acpi_pstate_strict is not used. + * As read status_register is an expensive operation and there + * are no specific error cases where an IO port write will fail. */ - - port = data->acpi_data.status_register.address; - bit_width = data->acpi_data.status_register.bit_width; - - dprintk("Looking for 0x%08x from port 0x%04x\n", - (u32) data->acpi_data.states[state].status, port); - - for (i=0; i<100; i++) { - ret = acpi_processor_read_port(port, bit_width, &value); - if (ret) { - dprintk("Invalid port width 0x%04x\n", bit_width); - retval = ret; - goto migrate_end; + if (acpi_pstate_strict) { + /* Then we read the 'status_register' and compare the value + * with the target state's 'status' to make sure the + * transition was successful. + * Note that we'll poll for up to 1ms (100 cycles of 10us) + * before giving up. + */ + + port = data->acpi_data.status_register.address; + bit_width = data->acpi_data.status_register.bit_width; + + dprintk("Looking for 0x%08x from port 0x%04x\n", + (u32) data->acpi_data.states[state].status, port); + + for (i=0; i<100; i++) { + ret = acpi_processor_read_port(port, bit_width, &value); + if (ret) { + dprintk("Invalid port width 0x%04x\n", bit_width); + retval = ret; + goto migrate_end; + } + if (value == (u32) data->acpi_data.states[state].status) + break; + udelay(10); } - if (value == (u32) data->acpi_data.states[state].status) - break; - udelay(10); + } else { + i = 0; + value = (u32) data->acpi_data.states[state].status; } /* notify cpufreq */ cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); - if (value != (u32) data->acpi_data.states[state].status) { + if (unlikely(value != (u32) data->acpi_data.states[state].status)) { unsigned int tmp = cpufreq_freqs.new; cpufreq_freqs.new = cpufreq_freqs.old; cpufreq_freqs.old = tmp; @@ -537,6 +550,8 @@ acpi_cpufreq_exit (void) return; } +module_param(acpi_pstate_strict, uint, 0644); +MODULE_PARM_DESC(acpi_pstate_strict, "value 0 or non-zero. non-zero -> strict ACPI checks are performed during frequency changes."); late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 04e3563da4f..8ef38544453 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -64,8 +64,6 @@ static int dont_scale_voltage; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) -#define __hlt() __asm__ __volatile__("hlt": : :"memory") - /* Clock ratios multiplied by 10 */ static int clock_ratio[32]; static int eblcr_table[32]; @@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul, outb(0xFE,0x21); /* TMR0 only */ outb(0xFF,0x80); /* delay */ - local_irq_enable(); - - __hlt(); + safe_halt(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); - __hlt(); + halt(); local_irq_disable(); @@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) bcr2.bits.CLOCKMUL = clock_ratio_index; local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - local_irq_enable(); - - __hlt(); + safe_halt(); /* Disable software clock multiplier */ rdmsrl (MSR_VIA_BCR2, bcr2.val); @@ -473,11 +467,11 @@ static void __init longhaul_setup_voltagescaling(void) } if (vrmrev==0) { - dprintk ("VRM 8.5 \n"); + dprintk ("VRM 8.5\n"); memcpy (voltage_table, vrm85scales, sizeof(voltage_table)); numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25; } else { - dprintk ("Mobile VRM \n"); + dprintk ("Mobile VRM\n"); memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table)); numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5; } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 327a55d4d1c..c397b622043 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -259,7 +259,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) if (model->op_points == NULL) { /* Matched a non-match */ - dprintk(KERN_INFO PFX "no table support for CPU model \"%s\": \n", + dprintk(KERN_INFO PFX "no table support for CPU model \"%s\"\n", cpu->x86_model_id); #ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n"); @@ -402,7 +402,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) for (i=0; i<p.state_count; i++) { if (p.states[i].control != p.states[i].status) { - dprintk("Different control (%x) and status values (%x)\n", + dprintk("Different control (%llu) and status values (%llu)\n", p.states[i].control, p.states[i].status); result = -EINVAL; goto err_unreg; @@ -415,7 +415,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } if (p.states[i].core_frequency > p.states[0].core_frequency) { - dprintk("P%u has larger frequency (%u) than P0 (%u), skipping\n", i, + dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i, p.states[i].core_frequency, p.states[0].core_frequency); p.states[i].core_frequency = 0; continue; @@ -498,13 +498,6 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - for (i = 0; i < N_IDS; i++) - if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) - break; - - if (i != N_IDS) - centrino_cpu[policy->cpu] = &cpu_ids[i]; - if (is_const_loops_cpu(policy->cpu)) { centrino_driver.flags |= CPUFREQ_CONST_LOOPS; } @@ -513,6 +506,13 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) if (policy->cpu != 0) return -ENODEV; + for (i = 0; i < N_IDS; i++) + if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) + break; + + if (i != N_IDS) + centrino_cpu[policy->cpu] = &cpu_ids[i]; + if (!centrino_cpu[policy->cpu]) { dprintk(KERN_INFO PFX "found unsupported CPU with " "Enhanced SpeedStep: send /proc/cpuinfo to " diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index b25fb6b635a..2718fb6f6ab 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -99,7 +99,7 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) u32 function = GET_SPEEDSTEP_FREQS; if (!(ist_info.event & 0xFFFF)) { - dprintk("bug #1422 -- can't read freqs from BIOS\n", result); + dprintk("bug #1422 -- can't read freqs from BIOS\n"); return -ENODEV; } diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index ba4b01138c8..ff87cc22b32 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ cr0 = 0x20000000; - __asm__("movl %%cr0,%%eax\n\t" - "orl %0,%%eax\n\t" - "movl %%eax,%%cr0\n" - : : "r" (cr0) - :"ax"); + write_cr0(read_cr0() | cr0); /* CCR2 bit 2: lock NW bit and set WT1 */ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index a2c33c1a46c..43601de0f63 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -82,16 +82,13 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) */ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) { - unsigned int eax; + unsigned int eax, ebx, ecx, edx; if (c->cpuid_level < 4) return 1; - __asm__("cpuid" - : "=a" (eax) - : "0" (4), "c" (0) - : "bx", "dx"); - + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); if (eax & 0x1f) return ((eax >> 26) + 1); else diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 6c55b50cf04..9e0d5f83cb9 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -305,6 +305,9 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf; unsigned long num_threads_sharing; +#ifdef CONFIG_X86_HT + struct cpuinfo_x86 *c = cpu_data + cpu; +#endif this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; @@ -314,10 +317,12 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) #ifdef CONFIG_X86_HT else if (num_threads_sharing == smp_num_siblings) this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; -#endif + else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) + this_leaf->shared_cpu_map = cpu_core_map[cpu]; else - printk(KERN_INFO "Number of CPUs sharing cache didn't match " + printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " "any known set of CPUs\n"); +#endif } #else static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 764cac64e21..dd4ebd6af7e 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -561,7 +561,7 @@ struct mtrr_value { static struct mtrr_value * mtrr_state; -static int mtrr_save(struct sys_device * sysdev, u32 state) +static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { int i; int size = num_var_ranges * sizeof(struct mtrr_value); diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index e5fab12f792..913be77bb84 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -153,7 +153,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ - __asm__("hlt"); + halt(); for(;;); return 1; diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index a3cdf894302..58516e2ac17 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -6,32 +6,28 @@ #include <linux/bootmem.h> -struct dmi_header { - u8 type; - u8 length; - u16 handle; -}; - -#undef DMI_DEBUG - -#ifdef DMI_DEBUG -#define dmi_printk(x) printk x -#else -#define dmi_printk(x) -#endif - static char * __init dmi_string(struct dmi_header *dm, u8 s) { u8 *bp = ((u8 *) dm) + dm->length; + char *str = ""; - if (!s) - return ""; - s--; - while (s > 0 && *bp) { - bp += strlen(bp) + 1; + if (s) { s--; - } - return bp; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; + s--; + } + + if (*bp != 0) { + str = alloc_bootmem(strlen(bp) + 1); + if (str != NULL) + strcpy(str, bp); + else + printk(KERN_ERR "dmi_string: out of memory.\n"); + } + } + + return str; } /* @@ -84,69 +80,76 @@ static int __init dmi_checksum(u8 *buf) return sum == 0; } -static int __init dmi_iterate(void (*decode)(struct dmi_header *)) +static char *dmi_ident[DMI_STRING_MAX]; +static LIST_HEAD(dmi_devices); + +/* + * Save a DMI string + */ +static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) { - u8 buf[15]; - char __iomem *p, *q; + char *p, *d = (char*) dm; - /* - * no iounmap() for that ioremap(); it would be a no-op, but it's - * so early in setup that sucker gets confused into doing what - * it shouldn't if we actually call it. - */ - p = ioremap(0xF0000, 0x10000); + if (dmi_ident[slot]) + return; + + p = dmi_string(dm, d[string]); if (p == NULL) - return -1; + return; - for (q = p; q < p + 0x10000; q += 16) { - memcpy_fromio(buf, q, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { - u16 num = (buf[13] << 8) | buf[12]; - u16 len = (buf[7] << 8) | buf[6]; - u32 base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; + dmi_ident[slot] = p; +} - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); +static void __init dmi_save_devices(struct dmi_header *dm) +{ + int i, count = (dm->length - sizeof(struct dmi_header)) / 2; + struct dmi_device *dev; + + for (i = 0; i < count; i++) { + char *d = ((char *) dm) + (i * 2); - dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", - num, len)); - dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base)); + /* Skip disabled device */ + if ((*d & 0x80) == 0) + continue; - if (dmi_table(base,len, num, decode) == 0) - return 0; + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_devices: out of memory.\n"); + break; } + + dev->type = *d++ & 0x7f; + dev->name = dmi_string(dm, *d); + dev->device_data = NULL; + + list_add(&dev->list, &dmi_devices); } - return -1; } -static char *dmi_ident[DMI_STRING_MAX]; - -/* - * Save a DMI string - */ -static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) +static void __init dmi_save_ipmi_device(struct dmi_header *dm) { - char *d = (char*)dm; - char *p = dmi_string(dm, d[string]); + struct dmi_device *dev; + void * data; - if (p == NULL || *p == 0) + data = alloc_bootmem(dm->length); + if (data == NULL) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; - if (dmi_ident[slot]) + } + + memcpy(data, dm, dm->length); + + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; + } - dmi_ident[slot] = alloc_bootmem(strlen(p) + 1); - if(dmi_ident[slot]) - strcpy(dmi_ident[slot], p); - else - printk(KERN_ERR "dmi_save_ident: out of memory.\n"); + dev->type = DMI_DEV_TYPE_IPMI; + dev->name = "IPMI controller"; + dev->device_data = data; + + list_add(&dev->list, &dmi_devices); } /* @@ -156,42 +159,69 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) */ static void __init dmi_decode(struct dmi_header *dm) { - u8 *data __attribute__((__unused__)) = (u8 *)dm; - switch(dm->type) { - case 0: - dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4]))); + case 0: /* BIOS Information */ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); - dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_BIOS_VERSION, 5); - dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8]))); dmi_save_ident(dm, DMI_BIOS_DATE, 8); break; - case 1: - dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4]))); + case 1: /* System Information */ dmi_save_ident(dm, DMI_SYS_VENDOR, 4); - dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); - dmi_printk(("Version: %s\n", dmi_string(dm, data[6]))); dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); - dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); break; - case 2: - dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4]))); + case 2: /* Base Board Information */ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); - dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_BOARD_NAME, 5); - dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6]))); dmi_save_ident(dm, DMI_BOARD_VERSION, 6); break; + case 10: /* Onboard Devices Information */ + dmi_save_devices(dm); + break; + case 38: /* IPMI Device Information */ + dmi_save_ipmi_device(dm); } } void __init dmi_scan_machine(void) { - if (dmi_iterate(dmi_decode)) - printk(KERN_INFO "DMI not present.\n"); + u8 buf[15]; + char __iomem *p, *q; + + /* + * no iounmap() for that ioremap(); it would be a no-op, but it's + * so early in setup that sucker gets confused into doing what + * it shouldn't if we actually call it. + */ + p = ioremap(0xF0000, 0x10000); + if (p == NULL) + goto out; + + for (q = p; q < p + 0x10000; q += 16) { + memcpy_fromio(buf, q, 15); + if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + u16 num = (buf[13] << 8) | buf[12]; + u16 len = (buf[7] << 8) | buf[6]; + u32 base = (buf[11] << 24) | (buf[10] << 16) | + (buf[9] << 8) | buf[8]; + + /* + * DMI version 0.0 means that the real version is taken from + * the SMBIOS version, which we don't know at this point. + */ + if (buf[14] != 0) + printk(KERN_INFO "DMI %d.%d present.\n", + buf[14] >> 4, buf[14] & 0xF); + else + printk(KERN_INFO "DMI present.\n"); + + if (dmi_table(base,len, num, dmi_decode) == 0) + return; + } + } + +out: printk(KERN_INFO "DMI not present.\n"); } @@ -218,9 +248,9 @@ int dmi_check_system(struct dmi_system_id *list) /* No match */ goto fail; } + count++; if (d->callback && d->callback(d)) break; - count++; fail: d++; } @@ -240,3 +270,32 @@ char *dmi_get_system_info(int field) return dmi_ident[field]; } EXPORT_SYMBOL(dmi_get_system_info); + +/** + * dmi_find_device - find onboard device by type/name + * @type: device type or %DMI_DEV_TYPE_ANY to match all device types + * @desc: device name string or %NULL to match all + * @from: previous device found in search, or %NULL for new search. + * + * Iterates through the list of known onboard devices. If a device is + * found with a matching @vendor and @device, a pointer to its device + * structure is returned. Otherwise, %NULL is returned. + * A new search is initiated by passing %NULL to the @from argument. + * If @from is not %NULL, searches continue from next device. + */ +struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from) +{ + struct list_head *d, *head = from ? &from->list : &dmi_devices; + + for(d = head->next; d != &dmi_devices; d = d->next) { + struct dmi_device *dev = list_entry(d, struct dmi_device, list); + + if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) && + ((name == NULL) || (strcmp(dev->name, name) == 0))) + return dev; + } + + return NULL; +} +EXPORT_SYMBOL(dmi_find_device); diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c index 789af3e9fb1..5edb1d379ad 100644 --- a/arch/i386/kernel/doublefault.c +++ b/arch/i386/kernel/doublefault.c @@ -20,7 +20,7 @@ static void doublefault_fn(void) struct Xgt_desc_struct gdt_desc = {0, 0}; unsigned long gdt, tss; - __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); + store_gdt(&gdt_desc); gdt = gdt_desc.address; printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 385883ea8c1..ecad519fd39 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void) * directory. If I have PSE, I just need to duplicate one entry in * page directory. */ - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); + cr4 = read_cr4(); if (cr4 & X86_CR4_PSE) { efi_bak_pg_dir_pointer[0].pgd = @@ -104,8 +104,7 @@ static void efi_call_phys_prelog(void) local_flush_tlb(); cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); - __asm__ __volatile__("lgdt %0":"=m" - (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]))); + load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); } static void efi_call_phys_epilog(void) @@ -114,8 +113,8 @@ static void efi_call_phys_epilog(void) cpu_gdt_descr[0].address = (unsigned long) __va(cpu_gdt_descr[0].address); - __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); + load_gdt(&cpu_gdt_descr[0]); + cr4 = read_cr4(); if (cr4 & X86_CR4_PSE) { swapper_pg_dir[pgd_index(0)].pgd = @@ -233,22 +232,23 @@ void __init efi_map_memmap(void) { memmap.map = NULL; - memmap.map = (efi_memory_desc_t *) - bt_ioremap((unsigned long) memmap.phys_map, - (memmap.nr_map * sizeof(efi_memory_desc_t))); - + memmap.map = bt_ioremap((unsigned long) memmap.phys_map, + (memmap.nr_map * memmap.desc_size)); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); + + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); } #if EFI_DEBUG static void __init print_efi_memmap(void) { efi_memory_desc_t *md; + void *p; int i; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { + md = p; printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " "range=[0x%016llx-0x%016llx) (%lluMB)\n", i, md->type, md->attribute, md->phys_addr, @@ -271,10 +271,10 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) } prev, curr; efi_memory_desc_t *md; unsigned long start, end; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->num_pages == 0) || (!is_available_memory(md))) continue; @@ -325,6 +325,7 @@ void __init efi_init(void) memmap.phys_map = EFI_MEMMAP; memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; memmap.desc_version = EFI_MEMDESC_VERSION; + memmap.desc_size = EFI_MEMDESC_SIZE; efi.systab = (efi_system_table_t *) boot_ioremap((unsigned long) efi_phys.systab, @@ -428,22 +429,30 @@ void __init efi_init(void) printk(KERN_ERR PFX "Could not map the runtime service table!\n"); /* Map the EFI memory map for use until paging_init() */ - - memmap.map = (efi_memory_desc_t *) - boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); - + memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); - if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { - printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " - "match the one from EFI!\n"); - } + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + #if EFI_DEBUG print_efi_memmap(); #endif } +static inline void __init check_range_for_systab(efi_memory_desc_t *md) +{ + if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && + ((unsigned long)efi_phys.systab < md->phys_addr + + ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) { + unsigned long addr; + + addr = md->virt_addr - md->phys_addr + + (unsigned long)efi_phys.systab; + efi.systab = (efi_system_table_t *)addr; + } +} + /* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that @@ -457,43 +466,32 @@ void __init efi_enter_virtual_mode(void) { efi_memory_desc_t *md; efi_status_t status; - int i; + void *p; efi.systab = NULL; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; - if (md->attribute & EFI_MEMORY_RUNTIME) { - md->virt_addr = - (unsigned long)ioremap(md->phys_addr, - md->num_pages << EFI_PAGE_SHIFT); - if (!(unsigned long)md->virt_addr) { - printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", - (unsigned long)md->phys_addr); - } + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; - if (((unsigned long)md->phys_addr <= - (unsigned long)efi_phys.systab) && - ((unsigned long)efi_phys.systab < - md->phys_addr + - ((unsigned long)md->num_pages << - EFI_PAGE_SHIFT))) { - unsigned long addr; - - addr = md->virt_addr - md->phys_addr + - (unsigned long)efi_phys.systab; - efi.systab = (efi_system_table_t *)addr; - } + md->virt_addr = (unsigned long)ioremap(md->phys_addr, + md->num_pages << EFI_PAGE_SHIFT); + if (!(unsigned long)md->virt_addr) { + printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", + (unsigned long)md->phys_addr); } + /* update the virtual address of the EFI system table */ + check_range_for_systab(md); } if (!efi.systab) BUG(); status = phys_efi_set_virtual_address_map( - sizeof(efi_memory_desc_t) * memmap.nr_map, - sizeof(efi_memory_desc_t), + memmap.desc_size * memmap.nr_map, + memmap.desc_size, memmap.desc_version, memmap.phys_map); @@ -533,10 +531,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, { struct resource *res; efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) @@ -613,10 +611,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, u32 efi_mem_type(unsigned long phys_addr) { efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) return md->type; @@ -627,10 +625,10 @@ u32 efi_mem_type(unsigned long phys_addr) u64 efi_mem_attributes(unsigned long phys_addr) { efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) return md->attribute; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index a991d4e5edd..3aad0383966 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -203,7 +203,7 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -226,9 +226,9 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) - # system call tracing in operation + # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -338,6 +338,9 @@ syscall_trace_entry: movl %esp, %eax xorl %edx,%edx call do_syscall_trace + cmpl $0, %eax + jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, + # so must skip actual syscall movl ORIG_EAX(%esp), %eax cmpl $(nr_syscalls), %eax jnae syscall_call @@ -504,7 +507,7 @@ label: \ pushl $__KERNEL_CS; \ pushl $sysenter_past_esp -ENTRY(debug) +KPROBE_ENTRY(debug) cmpl $sysenter_entry,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) @@ -515,7 +518,7 @@ debug_stack_correct: movl %esp,%eax # pt_regs pointer call do_debug jmp ret_from_exception - + .previous .text /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -588,13 +591,14 @@ nmi_16bit_stack: .long 1b,iret_exc .previous -ENTRY(int3) +KPROBE_ENTRY(int3) pushl $-1 # mark this as an int SAVE_ALL xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 jmp ret_from_exception + .previous .text ENTRY(overflow) pushl $0 @@ -628,17 +632,19 @@ ENTRY(stack_segment) pushl $do_stack_segment jmp error_code -ENTRY(general_protection) +KPROBE_ENTRY(general_protection) pushl $do_general_protection jmp error_code + .previous .text ENTRY(alignment_check) pushl $do_alignment_check jmp error_code -ENTRY(page_fault) +KPROBE_ENTRY(page_fault) pushl $do_page_fault jmp error_code + .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 4477bb10709..e437fb36749 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -17,7 +17,7 @@ #include <asm/desc.h> #include <asm/cache.h> #include <asm/thread_info.h> -#include <asm/asm_offsets.h> +#include <asm/asm-offsets.h> #include <asm/setup.h> /* @@ -77,6 +77,32 @@ ENTRY(startup_32) subl %edi,%ecx shrl $2,%ecx rep ; stosl +/* + * Copy bootup parameters out of the way. + * Note: %esi still has the pointer to the real-mode data. + * With the kexec as boot loader, parameter segment might be loaded beyond + * kernel image and might not even be addressable by early boot page tables. + * (kexec on panic case). Hence copy out the parameters before initializing + * page tables. + */ + movl $(boot_params - __PAGE_OFFSET),%edi + movl $(PARAM_SIZE/4),%ecx + cld + rep + movsl + movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi + andl %esi,%esi + jnz 2f # New command line protocol + cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR + jne 1f + movzwl OLD_CL_OFFSET,%esi + addl $(OLD_CL_BASE_ADDR),%esi +2: + movl $(saved_command_line - __PAGE_OFFSET),%edi + movl $(COMMAND_LINE_SIZE/4),%ecx + rep + movsl +1: /* * Initialize page tables. This creates a PDE and a set of page @@ -214,28 +240,6 @@ ENTRY(startup_32_smp) */ call setup_idt -/* - * Copy bootup parameters out of the way. - * Note: %esi still has the pointer to the real-mode data. - */ - movl $boot_params,%edi - movl $(PARAM_SIZE/4),%ecx - cld - rep - movsl - movl boot_params+NEW_CL_POINTER,%esi - andl %esi,%esi - jnz 2f # New command line protocol - cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR - jne 1f - movzwl OLD_CL_OFFSET,%esi - addl $(OLD_CL_BASE_ADDR),%esi -2: - movl $saved_command_line,%edi - movl $(COMMAND_LINE_SIZE/4),%ecx - rep - movsl -1: checkCPUtype: movl $-1,X86_CPUID # -1 for no CPUID initially diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c new file mode 100644 index 00000000000..c36d1c006c2 --- /dev/null +++ b/arch/i386/kernel/i8237.c @@ -0,0 +1,67 @@ +/* + * i8237.c: 8237A DMA controller suspend functions. + * + * Written by Pierre Ossman, 2005. + */ + +#include <linux/init.h> +#include <linux/sysdev.h> + +#include <asm/dma.h> + +/* + * This module just handles suspend/resume issues with the + * 8237A DMA controller (used for ISA and LPC). + * Allocation is handled in kernel/dma.c and normal usage is + * in asm/dma.h. + */ + +static int i8237A_resume(struct sys_device *dev) +{ + unsigned long flags; + int i; + + flags = claim_dma_lock(); + + dma_outb(DMA1_RESET_REG, 0); + dma_outb(DMA2_RESET_REG, 0); + + for (i = 0;i < 8;i++) { + set_dma_addr(i, 0x000000); + /* DMA count is a bit weird so this is not 0 */ + set_dma_count(i, 1); + } + + /* Enable cascade DMA or channel 0-3 won't work */ + enable_dma(4); + + release_dma_lock(flags); + + return 0; +} + +static int i8237A_suspend(struct sys_device *dev, pm_message_t state) +{ + return 0; +} + +static struct sysdev_class i8237_sysdev_class = { + set_kset_name("i8237"), + .suspend = i8237A_suspend, + .resume = i8237A_resume, +}; + +static struct sys_device device_i8237A = { + .id = 0, + .cls = &i8237_sysdev_class, +}; + +static int __init i8237A_init_sysfs(void) +{ + int error = sysdev_class_register(&i8237_sysdev_class); + if (!error) + error = sysdev_register(&device_i8237A); + return error; +} + +device_initcall(i8237A_init_sysfs); diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 6578f40bd50..1efdc76ae96 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -33,6 +33,7 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/sysdev.h> + #include <asm/io.h> #include <asm/smp.h> #include <asm/desc.h> @@ -77,7 +78,7 @@ static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -222,13 +223,21 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { unsigned long flags; int pin; struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; + cpumask_t tmp; + cpus_and(tmp, cpumask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(cpumask, tmp, CPU_MASK_ALL); + apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ apicid_value = apicid_value << 24; @@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } + set_irq_info(irq, cpumask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # define Dprintk(x...) # endif -cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; #define IRQBALANCE_CHECK_ARCH -999 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; @@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq) cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(irq, cpumask_of_cpu(new_cpu)); } } @@ -528,16 +532,12 @@ tryanotherirq: cpus_and(tmp, target_cpu_mask, allowed_mask); if (!cpus_empty(tmp)) { - irq_desc_t *desc = irq_desc + selected_irq; - unsigned long flags; Dprintk("irq = %d moved to cpu = %d\n", selected_irq, min_loaded); /* mark for change destination */ - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[selected_irq] = - cpumask_of_cpu(min_loaded); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); + /* Since we made a change, come back sooner to * check for more variation. */ @@ -568,7 +568,8 @@ static int balanced_irq(void *unused) /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < NR_IRQS ; i++) { - pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); + pending_irq_cpumask[i] = cpumask_of_cpu(0); + set_pending_irq(i, cpumask_of_cpu(0)); } for ( ; ; ) { @@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str) __setup("noirqbalance", irqbalance_disable); -static inline void move_irq(int irq) -{ - /* note - we hold the desc->lock */ - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { - set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); - cpus_clear(pending_irq_balance_cpumask[irq]); - } -} - late_initcall(balanced_irq_init); - -#else /* !CONFIG_IRQBALANCE */ -static inline void move_irq(int irq) { } #endif /* CONFIG_IRQBALANCE */ +#endif /* CONFIG_SMP */ #ifndef CONFIG_SMP void fastcall send_IPI_self(int vector) @@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void) } } +#endif /* * EISA Edge/Level control register, ELCR @@ -1127,7 +1119,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1641,9 +1634,9 @@ void disable_IO_APIC(void) clear_IO_APIC(); /* - * If the i82559 is routed through an IOAPIC + * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode - * so legacy interrups can be delivered. + * so legacy interrupts can be delivered. */ pin = find_isa_irq_pin(0, mp_ExtINT); if (pin != -1) { @@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); end_level_ioapic_irq(irq); } @@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } #endif +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1992,7 +1990,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * edge-triggered handler, without risking IRQ storms and other ugly * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -2000,10 +1998,12 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -2074,7 +2076,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ @@ -2421,7 +2423,7 @@ device_initcall(ioapic_init_sysfs); ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI int __init io_apic_get_unique_id (int ioapic, int apic_id) { @@ -2569,9 +2571,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; } -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index 8b25160393c..f2b37654777 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused) volatile struct pt_regs * regs = (struct pt_regs *) &unused; unsigned int level = regs->ebx; unsigned int old = (regs->eflags >> 12) & 3; + struct thread_struct *t = ¤t->thread; if (level > 3) return -EINVAL; @@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused) if (!capable(CAP_SYS_RAWIO)) return -EPERM; } - regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); - /* Make sure we return the long way (not sysenter) */ - set_thread_flag(TIF_IRET); + t->iopl = level << 12; + regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; + set_iopl_mask(t->iopl); return 0; } diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index a6d8c45961d..6345b430b10 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -62,32 +62,32 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode) return 0; } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { return 0; } -void arch_copy_kprobe(struct kprobe *p) +void __kprobes arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { } @@ -127,7 +127,8 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { unsigned long *sara = (unsigned long *)®s->esp; struct kretprobe_instance *ri; @@ -150,7 +151,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. */ -static int kprobe_handler(struct pt_regs *regs) +static int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -176,7 +177,8 @@ static int kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if (kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kprobe_saved_eflags; unlock_kprobes(); @@ -220,7 +222,10 @@ static int kprobe_handler(struct pt_regs *regs) * either a probepoint or a debugger breakpoint * at this address. In either case, no further * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. */ + regs->eip -= sizeof(kprobe_opcode_t); ret = 1; } /* Not one of ours: let kernel handle it */ @@ -259,7 +264,7 @@ no_kprobe: /* * Called when we hit the probe point at kretprobe_trampoline */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -338,7 +343,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long *tos = (unsigned long *)®s->esp; unsigned long next_eip = 0; @@ -444,8 +449,8 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine to for handling exceptions. */ -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch (val) { @@ -473,7 +478,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -495,7 +500,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void jprobe_return(void) +void __kprobes jprobe_return(void) { preempt_enable_no_resched(); asm volatile (" xchgl %%ebx,%%esp \n" @@ -506,7 +511,7 @@ void jprobe_return(void) (jprobe_saved_esp):"memory"); } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { u8 *addr = (u8 *) (regs->eip - 1); unsigned long stack_addr = (unsigned long)jprobe_saved_esp; diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index bb50afbee92..fe1ffa55587 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -177,7 +177,7 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) { struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2, *lp; + __u32 entry_1, entry_2; int error; struct user_desc ldt_info; @@ -205,8 +205,6 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); - /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { if (oldmode || LDT_empty(&ldt_info)) { @@ -223,8 +221,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) /* Install the new entry ... */ install: - *lp = entry_1; - *(lp+1) = entry_2; + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); error = 0; out_unlock: diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index cb699a2aa1f..a912fed4848 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -17,13 +17,7 @@ #include <asm/apic.h> #include <asm/cpufeature.h> #include <asm/desc.h> - -static inline unsigned long read_cr3(void) -{ - unsigned long cr3; - asm volatile("movl %%cr3,%0": "=r"(cr3)); - return cr3; -} +#include <asm/system.h> #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) @@ -99,10 +93,7 @@ static void set_idt(void *newidt, __u16 limit) curidt.size = limit; curidt.address = (unsigned long)newidt; - __asm__ __volatile__ ( - "lidtl %0\n" - : : "m" (curidt) - ); + load_idt(&curidt); }; @@ -114,10 +105,7 @@ static void set_gdt(void *newgdt, __u16 limit) curgdt.size = limit; curgdt.address = (unsigned long)newgdt; - __asm__ __volatile__ ( - "lgdtl %0\n" - : : "m" (curgdt) - ); + load_gdt(&curgdt); }; static void load_segments(void) diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index a77c612aad0..165f13158c6 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -164,7 +164,8 @@ static void collect_cpu_info (void *unused) } wrmsr(MSR_IA32_UCODE_REV, 0, 0); - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); + /* see notes above for revision 1.07. Apparent chip bug */ + serialize_cpu(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", @@ -377,7 +378,9 @@ static void do_update_one (void * unused) (unsigned long) uci->mc->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); + /* see notes above for revision 1.07. Apparent chip bug */ + serialize_cpu(); + /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index ce838abb27d..15949fd0810 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -65,6 +65,8 @@ int nr_ioapics; int pic_mode; unsigned long mp_lapic_addr; +unsigned int def_to_bigsmp = 0; + /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ @@ -121,7 +123,7 @@ static int MP_valid_apicid(int apicid, int version) static void __init MP_processor_info (struct mpc_config_processor *m) { int ver, apicid; - physid_mask_t tmp; + physid_mask_t phys_cpu; if (!(m->mpc_cpuflag & CPU_ENABLED)) return; @@ -192,27 +194,36 @@ static void __init MP_processor_info (struct mpc_config_processor *m) " Processor ignored.\n", maxcpus); return; } - num_processors++; ver = m->mpc_apicver; if (!MP_valid_apicid(apicid, ver)) { printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n", m->mpc_apicid, MAX_APICS); - --num_processors; return; } - tmp = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp); - + cpu_set(num_processors, cpu_possible_map); + num_processors++; + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + /* * Validate version */ if (ver == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + m->mpc_apicid); ver = 0x10; } apic_version[m->mpc_apicid] = ver; + if ((num_processors > 8) && + APIC_XAPIC(ver) && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) + def_to_bigsmp = 1; + else + def_to_bigsmp = 0; + bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } @@ -653,8 +664,6 @@ void __init get_smp_config (void) struct intel_mp_floating *mpf = mpf_found; /* - * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ @@ -810,7 +819,7 @@ void __init find_smp_config (void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI void __init mp_register_lapic_address ( u64 address) @@ -856,7 +865,7 @@ void __init mp_register_lapic ( MP_processor_info(&processor); } -#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT)) +#ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 @@ -1071,11 +1080,9 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) */ static int gsi_to_irq[MAX_GSI_NUM]; -#ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ if (acpi_fadt.sci_int == gsi) return gsi; -#endif ioapic = mp_find_ioapic(gsi); if (ioapic < 0) { @@ -1118,13 +1125,11 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) if (gsi < MAX_GSI_NUM) { if (gsi > 15) gsi = pci_irq++; -#ifdef CONFIG_ACPI_BUS /* * Don't assign IRQ used by ACPI SCI */ if (gsi == acpi_fadt.sci_int) gsi = pci_irq++; -#endif gsi_to_irq[irq] = gsi; } else { printk(KERN_ERR "GSI %u is too high\n", gsi); @@ -1138,5 +1143,5 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) return gsi; } -#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_X86_IO_APIC */ +#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index b2f03c39a6f..03100d6fc5d 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -46,23 +46,13 @@ static struct class *msr_class; -/* Note: "err" is handled in a funny way below. Otherwise one version - of gcc or another breaks. */ - static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx) { int err; - asm volatile ("1: wrmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err) - :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0)); - + err = wrmsr_safe(reg, eax, edx); + if (err) + err = -EIO; return err; } @@ -70,18 +60,9 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) { int err; - asm volatile ("1: rdmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,3b\n" - ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx) - :"c"(reg), "i"(-EIO), "0"(0)); - + err = rdmsr_safe(reg, eax, edx); + if (err) + err = -EIO; return err; } diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 8c242bb1ef4..0178457db72 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -478,6 +478,11 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); } extern void die_nmi(struct pt_regs *, const char *msg); @@ -501,8 +506,11 @@ void nmi_watchdog_tick (struct pt_regs * regs) */ alert_counter[cpu]++; if (alert_counter[cpu] == 5*nmi_hz) + /* + * die_nmi will return ONLY if NOTIFY_STOP happens.. + */ die_nmi(regs, "NMI Watchdog detected LOCKUP"); - } else { + last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index e3f362e8af5..b45cbf93d43 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -164,7 +164,7 @@ static inline void play_dead(void) */ local_irq_disable(); while (1) - __asm__ __volatile__("hlt":::"memory"); + halt(); } #else static inline void play_dead(void) @@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs) printk(" DS: %04x ES: %04x\n", 0xffff & regs->xds,0xffff & regs->xes); - __asm__("movl %%cr0, %0": "=r" (cr0)); - __asm__("movl %%cr2, %0": "=r" (cr2)); - __asm__("movl %%cr3, %0": "=r" (cr3)); - /* This could fault if %cr4 does not exist */ - __asm__("1: movl %%cr4, %0 \n" - "2: \n" - ".section __ex_table,\"a\" \n" - ".long 1b,2b \n" - ".previous \n" - : "=r" (cr4): "0" (0)); + cr0 = read_cr0(); + cr2 = read_cr2(); + cr3 = read_cr3(); + if (current_cpu_data.x86 > 4) { + cr4 = read_cr4(); + } printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); show_trace(NULL, ®s->esp); } @@ -682,21 +678,26 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas __unlazy_fpu(prev_p); /* - * Reload esp0, LDT and the page table pointer: + * Reload esp0. */ load_esp0(tss, next); /* - * Load the per-thread Thread-Local Storage descriptor. + * Save away %fs and %gs. No need to save %es and %ds, as + * those are always kernel segments while inside the kernel. + * Doing this before setting the new TLS descriptors avoids + * the situation where we temporarily have non-reloadable + * segments in %fs and %gs. This could be an issue if the + * NMI handler ever used %fs or %gs (it does not today), or + * if the kernel is running inside of a hypervisor layer. */ - load_TLS(next, cpu); + savesegment(fs, prev->fs); + savesegment(gs, prev->gs); /* - * Save away %fs and %gs. No need to save %es and %ds, as - * those are always kernel segments while inside the kernel. + * Load the per-thread Thread-Local Storage descriptor. */ - asm volatile("mov %%fs,%0":"=m" (prev->fs)); - asm volatile("mov %%gs,%0":"=m" (prev->gs)); + load_TLS(next, cpu); /* * Restore %fs and %gs if needed. @@ -711,6 +712,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas loadsegment(gs, next->gs); /* + * Restore IOPL if needed. + */ + if (unlikely(prev->iopl != next->iopl)) + set_iopl_mask(next->iopl); + + /* * Now maybe reload the debug registers */ if (unlikely(next->debugreg[7])) { diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 0da59b42843..7b6368bf897 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -271,6 +271,8 @@ static void clear_singlestep(struct task_struct *child) void ptrace_disable(struct task_struct *child) { clear_singlestep(child); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); } /* @@ -509,15 +511,20 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) } break; + case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; if (!valid_signal(data)) break; - if (request == PTRACE_SYSCALL) { + if (request == PTRACE_SYSEMU) { + set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + } else if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } - else { + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + } else { + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } child->exit_code = data; @@ -542,10 +549,17 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) wake_up_process(child); break; + case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */ case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; if (!valid_signal(data)) break; + + if (request == PTRACE_SYSEMU_SINGLESTEP) + set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_singlestep(child); child->exit_code = data; @@ -678,27 +692,58 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) * - triggered by current->work.syscall_trace */ __attribute__((regparm(3))) -void do_syscall_trace(struct pt_regs *regs, int entryexit) +int do_syscall_trace(struct pt_regs *regs, int entryexit) { - /* do the secure computing check first */ - secure_computing(regs->orig_eax); + int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); + /* + * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall + * interception + */ + int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); + int ret = 0; - if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); + /* do the secure computing check first */ + if (!entryexit) + secure_computing(regs->orig_eax); + + if (unlikely(current->audit_context)) { + if (entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), + regs->eax); + /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only + * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is + * not used, entry.S will call us only on syscall exit, not + * entry; so when TIF_SYSCALL_AUDIT is used we must avoid + * calling send_sigtrap() on syscall entry. + * + * Note that when PTRACE_SYSEMU_SINGLESTEP is used, + * is_singlestep is false, despite his name, so we will still do + * the correct thing. + */ + else if (is_singlestep) + goto out; + } if (!(current->ptrace & PT_PTRACED)) goto out; + /* If a process stops on the 1st tracepoint with SYSCALL_TRACE + * and then is resumed with SYSEMU_SINGLESTEP, it will come in + * here. We have to check this and return */ + if (is_sysemu && entryexit) + return 0; + /* Fake a debug trap */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (is_singlestep) send_sigtrap(current, regs, 0); - if (!test_thread_flag(TIF_SYSCALL_TRACE)) + if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) goto out; /* the 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + /* Note that the debugger could change the result of test_thread_flag!*/ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); /* * this isn't the same as continuing with a signal, but it will do @@ -709,9 +754,17 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } - out: + ret = is_sysemu; +out: if (unlikely(current->audit_context) && !entryexit) audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, regs->ebx, regs->ecx, regs->edx, regs->esi); - + if (ret == 0) + return 0; + + regs->orig_eax = -1; /* force skip of syscall restarting */ + if (unlikely(current->audit_context)) + audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), + regs->eax); + return 1; } diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index c71fef31dc4..1cbb9c0f470 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -13,6 +13,7 @@ #include <linux/dmi.h> #include <asm/uaccess.h> #include <asm/apic.h> +#include <asm/desc.h> #include "mach_reboot.h" #include <linux/reboot_fixups.h> @@ -242,13 +243,13 @@ void machine_real_restart(unsigned char *code, int length) /* Set up the IDT for real mode. */ - __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt)); + load_idt(&real_mode_idt); /* Set up a GDT from which we can load segment descriptors for real mode. The GDT is not used in real mode; it is just needed here to prepare the descriptors. */ - __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt)); + load_gdt(&real_mode_gdt); /* Load the data segment registers, and thus the descriptors ready for real mode. The base address of each segment is 0x100, 16 times the @@ -316,7 +317,7 @@ void machine_emergency_restart(void) if (!reboot_thru_bios) { if (efi_enabled) { efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); - __asm__ __volatile__("lidt %0": :"m" (no_idt)); + load_idt(&no_idt); __asm__ __volatile__("int3"); } /* rebooting needs to touch the page at absolute addr 0 */ @@ -325,7 +326,7 @@ void machine_emergency_restart(void) mach_reboot_fixups(); /* for board specific fixups */ mach_reboot(); /* That didn't work - force a triple fault.. */ - __asm__ __volatile__("lidt %0": :"m" (no_idt)); + load_idt(&no_idt); __asm__ __volatile__("int3"); } } diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c index 469f496e55c..7455ab64394 100644 --- a/arch/i386/kernel/semaphore.c +++ b/arch/i386/kernel/semaphore.c @@ -13,171 +13,9 @@ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> */ #include <linux/config.h> -#include <linux/sched.h> -#include <linux/err.h> -#include <linux/init.h> #include <asm/semaphore.h> /* - * Semaphores are implemented using a two-way counter: - * The "count" variable is decremented for each process - * that tries to acquire the semaphore, while the "sleeping" - * variable is a count of such acquires. - * - * Notably, the inline "up()" and "down()" functions can - * efficiently test if they need to do any extra work (up - * needs to do something only if count was negative before - * the increment operation. - * - * "sleeping" and the contention routine ordering is protected - * by the spinlock in the semaphore's waitqueue head. - * - * Note that these functions are only called when there is - * contention on the lock, and as such all this is the - * "non-critical" part of the whole semaphore business. The - * critical part is the inline stuff in <asm/semaphore.h> - * where we want to avoid any extra jumps and calls. - */ - -/* - * Logic: - * - only on a boundary condition do we need to care. When we go - * from a negative count to a non-negative, we wake people up. - * - when we go from a non-negative count to a negative do we - * (a) synchronize with the "sleeper" count and (b) make sure - * that we're on the wakeup list before we synchronize so that - * we cannot lose wakeup events. - */ - -static fastcall void __attribute_used__ __up(struct semaphore *sem) -{ - wake_up(&sem->wait); -} - -static fastcall void __attribute_used__ __sched __down(struct semaphore * sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_UNINTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * the wait_queue_head. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_UNINTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - tsk->state = TASK_RUNNING; -} - -static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_INTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * With signals pending, this turns into - * the trylock failure case - we won't be - * sleeping, and we* can't get the lock as - * it has contention. Just correct the count - * and exit. - */ - if (signal_pending(current)) { - retval = -EINTR; - sem->sleepers = 0; - atomic_add(sleepers, &sem->count); - break; - } - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * wait_queue_head. The "-1" is because we're - * still hoping to get the semaphore. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_INTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - - tsk->state = TASK_RUNNING; - return retval; -} - -/* - * Trylock failed - make sure we correct for - * having decremented the count. - * - * We could have done the trylock with a - * single "cmpxchg" without failure cases, - * but then it wouldn't work on a 386. - */ -static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem) -{ - int sleepers; - unsigned long flags; - - spin_lock_irqsave(&sem->wait.lock, flags); - sleepers = sem->sleepers + 1; - sem->sleepers = 0; - - /* - * Add "everybody else" and us into it. They aren't - * playing, because we own the spinlock in the - * wait_queue_head. - */ - if (!atomic_add_negative(sleepers, &sem->count)) { - wake_up_locked(&sem->wait); - } - - spin_unlock_irqrestore(&sem->wait.lock, flags); - return 1; -} - - -/* * The semaphore operations have a special calling sequence that * allow us to do a simpler in-line version of them. These routines * need to convert that sequence back into the C sequence when diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index af4de58cab5..f3d808451d2 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -82,19 +82,19 @@ EXPORT_SYMBOL(efi_enabled); /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -#ifdef CONFIG_ACPI_INTERPRETER +#ifdef CONFIG_ACPI int acpi_disabled = 0; #else int acpi_disabled = 1; #endif EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI int __initdata acpi_force = 0; extern acpi_interrupt_flags acpi_sci_flags; #endif @@ -139,6 +139,7 @@ struct sys_desc_table_struct { unsigned char table[0]; }; struct edid_info edid_info; +EXPORT_SYMBOL_GPL(edid_info); struct ist_info ist_info; #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) @@ -370,12 +371,16 @@ static void __init limit_regions(unsigned long long size) int i; if (efi_enabled) { - for (i = 0; i < memmap.nr_map; i++) { - current_addr = memmap.map[i].phys_addr + - (memmap.map[i].num_pages << 12); - if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) { + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map, i = 0; p < memmap.map_end; + p += memmap.desc_size, i++) { + md = p; + current_addr = md->phys_addr + (md->num_pages << 12); + if (md->type == EFI_CONVENTIONAL_MEMORY) { if (current_addr >= size) { - memmap.map[i].num_pages -= + md->num_pages -= (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); memmap.nr_map = i + 1; return; @@ -794,7 +799,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) } #endif -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter */ else if (!memcmp(from, "acpi=off", 8)) { disable_acpi(); @@ -850,7 +855,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) else if (!memcmp(from, "noapic", 6)) disable_ioapic_setup(); #endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* CONFIG_ACPI_BOOT */ +#endif /* CONFIG_ACPI */ #ifdef CONFIG_X86_LOCAL_APIC /* enable local APIC */ @@ -1295,7 +1300,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat */ static void __init register_memory(void) { - unsigned long gapstart, gapsize; + unsigned long gapstart, gapsize, round; unsigned long long last; int i; @@ -1340,14 +1345,14 @@ static void __init register_memory(void) } /* - * Start allocating dynamic PCI memory a bit into the gap, - * aligned up to the nearest megabyte. - * - * Question: should we try to pad it up a bit (do something - * like " + (gapsize >> 3)" in there too?). We now have the - * technology. + * See how much we want to round up: start off with + * rounding to the next 1MB area. */ - pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", pci_mem_start, gapstart, gapsize); @@ -1575,14 +1580,20 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Parse the ACPI tables for possible boot-time SMP configuration. */ acpi_boot_table_init(); acpi_boot_init(); -#endif +#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) + if (def_to_bigsmp) + printk(KERN_WARNING "More than 8 CPUs detected and " + "CONFIG_X86_PC cannot handle it.\nUse " + "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); +#endif +#endif #ifdef CONFIG_X86_LOCAL_APIC if (smp_found_config) get_smp_config(); diff --git a/arch/i386/kernel/sigframe.h b/arch/i386/kernel/sigframe.h index d21b14f5c25..0b2221711da 100644 --- a/arch/i386/kernel/sigframe.h +++ b/arch/i386/kernel/sigframe.h @@ -1,6 +1,6 @@ struct sigframe { - char *pretcode; + char __user *pretcode; int sig; struct sigcontext sc; struct _fpstate fpstate; @@ -10,10 +10,10 @@ struct sigframe struct rt_sigframe { - char *pretcode; + char __user *pretcode; int sig; - struct siginfo *pinfo; - void *puc; + struct siginfo __user *pinfo; + void __user *puc; struct siginfo info; struct ucontext uc; struct _fpstate fpstate; diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 140e340569c..61eb0c8a6e4 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -278,9 +278,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, int tmp, err = 0; tmp = 0; - __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + savesegment(gs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + savesegment(fs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->fs); err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); @@ -604,7 +604,9 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * We want the common case to go fast, which * is why we may in certain cases get here from * kernel mode. Just return without doing anything - * if so. + * if so. vm86 regs switched out by assembly code + * before reaching here, so testing against kernel + * CS suffices. */ if (!user_mode(regs)) return 1; diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index cec4bde6716..48b55db3680 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy) local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) - for(;;) __asm__("hlt"); + for(;;) halt(); for (;;); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 8ac8e9fd561..5e4893d2b9f 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -88,6 +88,8 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there @@ -1017,8 +1019,8 @@ int __devinit smp_prepare_cpu(int cpu) tsc_sync_disabled = 1; /* init low mem mapping */ - memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + KERNEL_PGD_PTRS); flush_tlb_all(); schedule_work(&task); wait_for_completion(&done); @@ -1265,6 +1267,7 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); cpu_set(smp_processor_id(), cpu_present_map); + cpu_set(smp_processor_id(), cpu_possible_map); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 0ee9dee8af0..2883a4d4f01 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -194,10 +194,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; @@ -252,8 +249,7 @@ EXPORT_SYMBOL(profile_pc); * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static inline void do_timer_interrupt(int irq, struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { @@ -307,7 +303,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) cur_timer->mark_offset(); - do_timer_interrupt(irq, NULL, regs); + do_timer_interrupt(irq, regs); write_sequnlock(&xtime_lock); return IRQ_HANDLED; @@ -333,8 +329,7 @@ EXPORT_SYMBOL(get_cmos_time); static void sync_cmos_clock(unsigned long dummy); -static struct timer_list sync_cmos_timer = - TIMER_INITIALIZER(sync_cmos_clock, 0, 0); +static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); static void sync_cmos_clock(unsigned long dummy) { @@ -348,7 +343,7 @@ static void sync_cmos_clock(unsigned long dummy) * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... */ - if ((time_status & STA_UNSYNC) != 0) + if (!ntp_synced()) /* * Not synced, exit, do not restart a timer (if one is * running, let it run out). @@ -383,6 +378,7 @@ void notify_arch_cmos_timer(void) static long clock_cmos_diff, sleep_start; +static struct timer_opts *last_timer; static int timer_suspend(struct sys_device *dev, pm_message_t state) { /* @@ -391,6 +387,10 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state) clock_cmos_diff = -get_cmos_time(); clock_cmos_diff += get_seconds(); sleep_start = get_cmos_time(); + last_timer = cur_timer; + cur_timer = &timer_none; + if (last_timer->suspend) + last_timer->suspend(state); return 0; } @@ -404,6 +404,7 @@ static int timer_resume(struct sys_device *dev) if (is_hpet_enabled()) hpet_reenable(); #endif + setup_pit_timer(); sec = get_cmos_time() + clock_cmos_diff; sleep_length = (get_cmos_time() - sleep_start) * HZ; write_seqlock_irqsave(&xtime_lock, flags); @@ -412,6 +413,11 @@ static int timer_resume(struct sys_device *dev) write_sequnlock_irqrestore(&xtime_lock, flags); jiffies += sleep_length; wall_jiffies += sleep_length; + if (last_timer->resume) + last_timer->resume(); + cur_timer = last_timer; + last_timer = NULL; + touch_softlockup_watchdog(); return 0; } diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index ef8dac5dd33..d973a8b681f 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -18,8 +18,8 @@ #include "mach_timer.h" #include <asm/hpet.h> -static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */ -static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ +static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ +static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ @@ -136,6 +136,8 @@ static void delay_hpet(unsigned long loops) } while ((hpet_end - hpet_start) < (loops)); } +static struct timer_opts timer_hpet; + static int __init init_hpet(char* override) { unsigned long result, remain; @@ -163,6 +165,8 @@ static int __init init_hpet(char* override) } set_cyc2ns_scale(cpu_khz/1000); } + /* set this only when cpu_has_tsc */ + timer_hpet.read_timer = read_timer_tsc; } /* @@ -177,6 +181,19 @@ static int __init init_hpet(char* override) return 0; } +static int hpet_resume(void) +{ + write_seqlock(&monotonic_lock); + /* Assume this is the last mark offset time */ + rdtsc(last_tsc_low, last_tsc_high); + + if (hpet_use_timer) + hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + hpet_last = hpet_readl(HPET_COUNTER); + write_sequnlock(&monotonic_lock); + return 0; +} /************************************************************/ /* tsc timer_opts struct */ @@ -186,7 +203,7 @@ static struct timer_opts timer_hpet __read_mostly = { .get_offset = get_offset_hpet, .monotonic_clock = monotonic_clock_hpet, .delay = delay_hpet, - .read_timer = read_timer_tsc, + .resume = hpet_resume, }; struct init_timer_opts __initdata timer_hpet_init = { diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c index 06de036a820..eddb6403823 100644 --- a/arch/i386/kernel/timers/timer_pit.c +++ b/arch/i386/kernel/timers/timer_pit.c @@ -175,30 +175,3 @@ void setup_pit_timer(void) outb(LATCH >> 8 , PIT_CH0); /* MSB */ spin_unlock_irqrestore(&i8253_lock, flags); } - -static int timer_resume(struct sys_device *dev) -{ - setup_pit_timer(); - return 0; -} - -static struct sysdev_class timer_sysclass = { - set_kset_name("timer_pit"), - .resume = timer_resume, -}; - -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int __init init_timer_sysfs(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(init_timer_sysfs); - diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index 4ef20e66349..264edaaac31 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -186,6 +186,14 @@ static void mark_offset_pmtmr(void) } } +static int pmtmr_resume(void) +{ + write_seqlock(&monotonic_lock); + /* Assume this is the last mark offset time */ + offset_tick = read_pmtmr(); + write_sequnlock(&monotonic_lock); + return 0; +} static unsigned long long monotonic_clock_pmtmr(void) { @@ -247,6 +255,7 @@ static struct timer_opts timer_pmtmr = { .monotonic_clock = monotonic_clock_pmtmr, .delay = delay_pmtmr, .read_timer = read_timer_tsc, + .resume = pmtmr_resume, }; struct init_timer_opts __initdata timer_pmtmr_init = { diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 8f4e4d5bc56..6dd470cc9f7 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -543,6 +543,19 @@ static int __init init_tsc(char* override) return -ENODEV; } +static int tsc_resume(void) +{ + write_seqlock(&monotonic_lock); + /* Assume this is the last mark offset time */ + rdtsc(last_tsc_low, last_tsc_high); +#ifdef CONFIG_HPET_TIMER + if (is_hpet_enabled() && hpet_use_timer) + hpet_last = hpet_readl(HPET_COUNTER); +#endif + write_sequnlock(&monotonic_lock); + return 0; +} + #ifndef CONFIG_X86_TSC /* disable flag for tsc. Takes effect by clearing the TSC cpu flag * in cpu/common.c */ @@ -573,6 +586,7 @@ static struct timer_opts timer_tsc = { .monotonic_clock = monotonic_clock_tsc, .delay = delay_tsc, .read_timer = read_timer_tsc, + .resume = tsc_resume, }; struct init_timer_opts __initdata timer_tsc_init = { diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index cd2d5d5514f..09a58cb6daa 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -210,7 +210,7 @@ void show_registers(struct pt_regs *regs) unsigned short ss; esp = (unsigned long) (®s->esp); - ss = __KERNEL_DS; + savesegment(ss, ss); if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; @@ -267,9 +267,6 @@ static void handle_BUG(struct pt_regs *regs) char c; unsigned long eip; - if (user_mode(regs)) - goto no_bug; /* Not in kernel */ - eip = regs->eip; if (eip < PAGE_OFFSET) @@ -366,8 +363,9 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e die(str, regs, err); } -static void do_trap(int trapnr, int signr, char *str, int vm86, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, + struct pt_regs * regs, long error_code, + siginfo_t *info) { struct task_struct *tsk = current; tsk->thread.error_code = error_code; @@ -463,7 +461,8 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) -fastcall void do_general_protection(struct pt_regs * regs, long error_code) +fastcall void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -568,6 +567,10 @@ static DEFINE_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == + NOTIFY_STOP) + return; + spin_lock(&nmi_print_lock); /* * We are in trouble anyway, lets at least try @@ -656,7 +659,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) ++nmi_count(cpu); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); @@ -664,7 +667,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + rcu_assign_pointer(nmi_callback, callback); } EXPORT_SYMBOL_GPL(set_nmi_callback); @@ -675,7 +678,7 @@ void unset_nmi_callback(void) EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_KPROBES -fastcall void do_int3(struct pt_regs *regs, long error_code) +fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) @@ -709,7 +712,7 @@ fastcall void do_int3(struct pt_regs *regs, long error_code) * find every occurrence of the TF bit that could be saved away even * by user code) */ -fastcall void do_debug(struct pt_regs * regs, long error_code) +fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) { unsigned int condition; struct task_struct *tsk = current; @@ -1008,7 +1011,7 @@ void __init trap_init_f00f_bug(void) * it uses the read-only mapped virtual address. */ idt_descr.address = fix_to_virt(FIX_F00F_IDT); - __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); + load_idt(&idt_descr); } #endif diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index ec0f68ce688..16b48500962 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -294,8 +294,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk */ info->regs32->eax = 0; tsk->thread.saved_esp0 = tsk->thread.esp0; - asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs)); - asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs)); + savesegment(fs, tsk->thread.saved_fs); + savesegment(gs, tsk->thread.saved_gs); tss = &per_cpu(init_tss, get_cpu()); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; @@ -542,7 +542,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) unsigned char opcode; unsigned char __user *csp; unsigned char __user *ssp; - unsigned short ip, sp; + unsigned short ip, sp, orig_flags; int data32, pref_done; #define CHECK_IF_IN_TRAP \ @@ -551,8 +551,12 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) #define VM86_FAULT_RETURN do { \ if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ return_to_32bit(regs, VM86_PICRETURN); \ + if (orig_flags & TF_MASK) \ + handle_vm86_trap(regs, 0, 1); \ return; } while (0) + orig_flags = *(unsigned short *)®s->eflags; + csp = (unsigned char __user *) (regs->cs << 4); ssp = (unsigned char __user *) (regs->ss << 4); sp = SP(regs); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 761972f8cb6..13b9c62cbbb 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -22,6 +22,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S index c8fcf75b9be..fadb5bc3c37 100644 --- a/arch/i386/kernel/vsyscall-sigreturn.S +++ b/arch/i386/kernel/vsyscall-sigreturn.S @@ -7,7 +7,7 @@ */ #include <asm/unistd.h> -#include <asm/asm_offsets.h> +#include <asm/asm-offsets.h> /* XXX @@ -15,7 +15,7 @@ */ .text - .org __kernel_vsyscall+32 + .org __kernel_vsyscall+32,0x90 .globl __kernel_sigreturn .type __kernel_sigreturn,@function __kernel_sigreturn: @@ -35,6 +35,7 @@ __kernel_rt_sigreturn: int $0x80 .LEND_rt_sigreturn: .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + .balign 32 .previous .section .eh_frame,"a",@progbits diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index a7977707c8e..98699ca6e52 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S @@ -3,7 +3,7 @@ * object prelinked to its virtual address, and with only one read-only * segment (that fits in one page). This script controls its layout. */ -#include <asm/asm_offsets.h> +#include <asm/asm-offsets.h> SECTIONS { |