From 3b08606dc2991bcdab14139efd9ed9d492f5f901 Mon Sep 17 00:00:00 2001 From: keith mannthey Date: Fri, 29 Sep 2006 01:58:46 -0700 Subject: [PATCH] convert i386 Summit subarch to use SRAT info for apicid_to_node calls Convert the i386 summit subarch apicid_to_node to use node information provided by the SRAT. It was discussed a little on LKML a few weeks ago and was seen as an acceptable fix. The current way of obtaining the nodeid static inline int apicid_to_node(int logical_apicid) { return logical_apicid >> 5; } is just not correct for all summit systems/bios. Assuming the apicid matches the Linux node number require a leap of faith that the bios mapped out the apicids a set way. Modern summit HW (IBM x460) does not layout its bios in the manner for various reasons and is unable to boot i386 numa. The best way to get the correct apicid to node information is from the SRAT table during boot. It lays out what apicid belongs to what node. I use this information to create a table for use at run time. Signed-off-by: Keith Mannthey Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 5 ++++- arch/i386/kernel/srat.c | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 020d873b7d2..82b26d5ce47 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -102,6 +102,8 @@ u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); +u8 apicid_2_node[MAX_APICID]; + /* * Trampoline 80x86 program as an array. */ @@ -645,7 +647,7 @@ static void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); - int node = apicid_to_node(apicid); + int node = apicid_to_node(hard_smp_processor_id()); if (!node_online(node)) node = first_online_node; @@ -954,6 +956,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu) irq_ctx_init(cpu); + x86_cpu_to_apicid[cpu] = apicid; /* * This grunge runs the startup process for * the targeted processor. diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 32413122c4c..f7e735c077c 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * proximity macros and definitions @@ -54,6 +55,7 @@ struct node_memory_chunk_s { static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; static int num_memory_chunks; /* total number of memory chunks */ +static u8 __initdata apicid_to_pxm[MAX_APICID]; extern void * boot_ioremap(unsigned long, unsigned long); @@ -69,6 +71,8 @@ static void __init parse_cpu_affinity_structure(char *p) /* mark this node as "seen" in node bitmap */ BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain); + apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain; + printk("CPU 0x%02X in proximity domain 0x%02X\n", cpu_affinity->apic_id, cpu_affinity->proximity_domain); } @@ -235,6 +239,9 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) printk("Number of logical nodes in system = %d\n", num_online_nodes()); printk("Number of memory chunks in system = %d\n", num_memory_chunks); + for (i = 0; i < MAX_APICID; i++) + apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); + for (j = 0; j < num_memory_chunks; j++){ struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", -- cgit From df67b3daea602728b51325a4debaeeb912ee51d1 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 29 Sep 2006 01:58:58 -0700 Subject: [PATCH] make PROT_WRITE imply PROT_READ Make PROT_WRITE imply PROT_READ for a number of architectures which don't support write only in hardware. While looking at this, I noticed that some architectures which do not support write only mappings already take the exact same approach. For example, in arch/alpha/mm/fault.c: " if (cause < 0) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) goto bad_area; } else { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } " Thus, this patch brings other architectures which do not support write only mappings in-line and consistent with the rest. I've verified the patch on ia64, x86_64 and x86. Additional discussion: Several architectures, including x86, can not support write-only mappings. The pte for x86 reserves a single bit for protection and its two states are read only or read/write. Thus, write only is not supported in h/w. Currently, if i 'mmap' a page write-only, the first read attempt on that page creates a page fault and will SEGV. That check is enforced in arch/blah/mm/fault.c. However, if i first write that page it will fault in and the pte will be set to read/write. Thus, any subsequent reads to the page will succeed. It is this inconsistency in behavior that this patch is attempting to address. Furthermore, if the page is swapped out, and then brought back the first read will also cause a SEGV. Thus, any arbitrary read on a page can potentially result in a SEGV. According to the SuSv3 spec, "if the application requests only PROT_WRITE, the implementation may also allow read access." Also as mentioned, some archtectures, such as alpha, shown above already take the approach that i am suggesting. The counter-argument to this raised by Arjan, is that the kernel is enforcing the write only mapping the best it can given the h/w limitations. This is true, however Alan Cox, and myself would argue that the inconsitency in behavior, that is applications can sometimes work/sometimes fails is highly undesireable. If you read through the thread, i think people, came to an agreement on the last patch i posted, as nobody has objected to it... Signed-off-by: Jason Baron Cc: Russell King Cc: "Luck, Tony" Cc: Hugh Dickins Cc: Roman Zippel Cc: Geert Uytterhoeven Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Acked-by: Andi Kleen Acked-by: Alan Cox Cc: Arjan van de Ven Acked-by: Paul Mundt Cc: Kazumoto Kojima Cc: Ian Molton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 5e17a3f43b4..50d8617391d 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -440,7 +440,7 @@ good_area: case 1: /* read, present */ goto bad_area; case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; } -- cgit From b0de7fca10a3d7ddb99188b4752cc473c4e2c96e Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 29 Sep 2006 01:59:23 -0700 Subject: [PATCH] efi: add lock annotations for efi_call_phys_prelog and efi_call_phys_epilog The functions efi_call_phys_prelog and efi_call_phys_epilog in arch/i386/kernel/efi.c wrap the spinlock efi_rt_lock: efi_call_phys_prelog returns with the lock held, and efi_call_phys_epilog releases the lock without acquiring it. Add lock annotations to these two functions so that sparse can check callers for lock pairing, and so that sparse will not complain about these functions since they intentionally use locks in this manner. Signed-off-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/efi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index fe158042110..f9436989473 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -65,7 +65,7 @@ static unsigned long efi_rt_eflags; static DEFINE_SPINLOCK(efi_rt_lock); static pgd_t efi_bak_pg_dir_pointer[2]; -static void efi_call_phys_prelog(void) +static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) { unsigned long cr4; unsigned long temp; @@ -109,7 +109,7 @@ static void efi_call_phys_prelog(void) load_gdt(cpu_gdt_descr); } -static void efi_call_phys_epilog(void) +static void efi_call_phys_epilog(void) __releases(efi_rt_lock) { unsigned long cr4; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); -- cgit From 6b77df08a36d989f7dd00ccb6a026a0e96170d16 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 29 Sep 2006 02:00:01 -0700 Subject: [PATCH] oprofile: ppro: need to enable/disable all the counters Need to enable/disable all the counters instead of just counter 0. This affects all cpus with family=6, including i386/core. Usual symptom: only counter 0 provides samples. Other counters don't produce samples. Signed-off-by: Arun Sharma Cc: Philippe Elie Cc: John Levon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/oprofile/op_model_ppro.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c index f88e05ba8eb..ca2447e05e1 100644 --- a/arch/i386/oprofile/op_model_ppro.c +++ b/arch/i386/oprofile/op_model_ppro.c @@ -138,11 +138,14 @@ static int ppro_check_ctrs(struct pt_regs * const regs, static void ppro_start(struct op_msrs const * const msrs) { unsigned int low,high; + int i; - if (reset_value[0]) { - CTRL_READ(low, high, msrs, 0); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, 0); + for (i = 0; i < NUM_COUNTERS; ++i) { + if (reset_value[i]) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } } } @@ -150,11 +153,14 @@ static void ppro_start(struct op_msrs const * const msrs) static void ppro_stop(struct op_msrs const * const msrs) { unsigned int low,high; + int i; - if (reset_value[0]) { - CTRL_READ(low, high, msrs, 0); + for (i = 0; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; + CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, 0); + CTRL_WRITE(low, high, msrs, i); } } -- cgit From fc09561d6392771a392dea55c287de7e849b6b63 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Fri, 29 Sep 2006 02:00:04 -0700 Subject: [PATCH] kthread: convert arch/i386/kernel/apm.c Convert i386 apm.c from kernel_thread(), whose export is deprecated, to kthread API. Signed-off-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index ff9ce4b5eaa..b42f2d914af 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -225,6 +225,7 @@ #include #include #include +#include #include #include @@ -402,8 +403,6 @@ static int realmode_power_off = 1; #else static int realmode_power_off; #endif -static int exit_kapmd __read_mostly; -static int kapmd_running __read_mostly; #ifdef CONFIG_APM_ALLOW_INTS static int allow_ints = 1; #else @@ -419,6 +418,8 @@ static const struct desc_struct bad_bios_desc = { 0, 0x00409200 }; static const char driver_version[] = "1.16ac"; /* no spaces */ +static struct task_struct *kapmd_task; + /* * APM event names taken from the APM 1.2 specification. These are * the message codes that the BIOS uses to tell us about events @@ -1423,7 +1424,7 @@ static void apm_mainloop(void) set_current_state(TASK_INTERRUPTIBLE); for (;;) { schedule_timeout(APM_CHECK_TIMEOUT); - if (exit_kapmd) + if (kthread_should_stop()) break; /* * Ok, check all events, check for idle (and mark us sleeping @@ -1706,12 +1707,6 @@ static int apm(void *unused) char * power_stat; char * bat_stat; - kapmd_running = 1; - - daemonize("kapmd"); - - current->flags |= PF_NOFREEZE; - #ifdef CONFIG_SMP /* 2002/08/01 - WT * This is to avoid random crashes at boot time during initialization @@ -1821,7 +1816,6 @@ static int apm(void *unused) console_blank_hook = NULL; #endif } - kapmd_running = 0; return 0; } @@ -2220,7 +2214,7 @@ static int __init apm_init(void) { struct proc_dir_entry *apm_proc; struct desc_struct *gdt; - int ret; + int err; dmi_check_system(apm_dmi_table); @@ -2329,12 +2323,17 @@ static int __init apm_init(void) if (apm_proc) apm_proc->owner = THIS_MODULE; - ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD); - if (ret < 0) { - printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n"); + kapmd_task = kthread_create(apm, NULL, "kapmd"); + if (IS_ERR(kapmd_task)) { + printk(KERN_ERR "apm: disabled - Unable to start kernel " + "thread.\n"); + err = PTR_ERR(kapmd_task); + kapmd_task = NULL; remove_proc_entry("apm", NULL); - return -ENOMEM; + return err; } + kapmd_task->flags |= PF_NOFREEZE; + wake_up_process(kapmd_task); if (num_online_cpus() > 1 && !smp ) { printk(KERN_NOTICE @@ -2384,9 +2383,10 @@ static void __exit apm_exit(void) remove_proc_entry("apm", NULL); if (power_off) pm_power_off = NULL; - exit_kapmd = 1; - while (kapmd_running) - schedule(); + if (kapmd_task) { + kthread_stop(kapmd_task); + kapmd_task = NULL; + } #ifdef CONFIG_PM_LEGACY pm_active = 0; #endif -- cgit From f400e198b2ed26ce55b22a1412ded0896e7516ac Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Fri, 29 Sep 2006 02:00:07 -0700 Subject: [PATCH] pidspace: is_init() This is an updated version of Eric Biederman's is_init() patch. (http://lkml.org/lkml/2006/2/6/280). It applies cleanly to 2.6.18-rc3 and replaces a few more instances of ->pid == 1 with is_init(). Further, is_init() checks pid and thus removes dependency on Eric's other patches for now. Eric's original description: There are a lot of places in the kernel where we test for init because we give it special properties. Most significantly init must not die. This results in code all over the kernel test ->pid == 1. Introduce is_init to capture this case. With multiple pid spaces for all of the cases affected we are looking for only the first process on the system, not some other process that has pid == 1. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Dave Hansen Cc: Serge Hallyn Cc: Cedric Le Goater Cc: Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/lib/usercopy.c | 2 +- arch/i386/mm/fault.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index efc7e7d5f4d..08502fc6d0c 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -739,7 +739,7 @@ survive: retval = get_user_pages(current, current->mm, (unsigned long )to, 1, 1, 0, &pg, NULL); - if (retval == -ENOMEM && current->pid == 1) { + if (retval == -ENOMEM && is_init(current)) { up_read(¤t->mm->mmap_sem); blk_congestion_wait(WRITE, HZ/50); goto survive; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 50d8617391d..2581575786c 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -589,7 +589,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; -- cgit From ae853e79faa27ca1a08c8ae3b5aee71ea98482f9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Sep 2006 01:47:55 +0200 Subject: [PATCH] i386: Update defconfig Signed-off-by: Andi Kleen --- arch/i386/defconfig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 1a29bfa26d0..ee2d79bd8af 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-git5 -# Tue Sep 26 09:30:47 2006 +# Linux kernel version: 2.6.18-git7 +# Wed Sep 27 21:53:10 2006 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y @@ -210,6 +210,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_PM=y CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set +CONFIG_PM_SYSFS_DEPRECATED=y # # ACPI (Advanced Configuration and Power Interface) Support @@ -292,6 +293,7 @@ CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y # CONFIG_PCIEPORTBUS is not set CONFIG_PCI_MSI=y +# CONFIG_PCI_MULTITHREAD_PROBE is not set # CONFIG_PCI_DEBUG is not set CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set @@ -1427,6 +1429,7 @@ CONFIG_KPROBES=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_KERNEL=y -- cgit From 29cbc78b90a73ad80f2f58ba2927956cf663abed Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Sep 2006 01:47:55 +0200 Subject: [PATCH] x86: Clean up x86 NMI sysctls Use prototypes in headers Don't define panic_on_unrecovered_nmi for all architectures Cc: dzickus@redhat.com Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 3 +++ arch/i386/kernel/traps.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index dbda706fdd1..0fc4997fb14 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -31,6 +31,9 @@ #include "mach_traps.h" +int unknown_nmi_panic; +int nmi_watchdog_enabled; + /* perfctr_nmi_owner tracks the ownership of the perfctr registers: * evtsel_nmi_owner tracks the ownership of the event selection * - different performance counters/ event selection may be reserved for diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index a13037fe0ee..6820b8d643c 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -57,6 +57,8 @@ #include "mach_traps.h" +int panic_on_unrecovered_nmi; + asmlinkage int system_call(void); struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, -- cgit From b9629b82c81709eb0f9c98b8f9cc684f8e212ef3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 30 Sep 2006 01:47:55 +0200 Subject: [PATCH] i386: replace intermediate array-size definitions with ARRAY_SIZE() Code is easier to validate if array sizes aren't hidden behind extra #defines. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen --- arch/i386/kernel/setup.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 814cdebf737..000cf03751f 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -209,9 +209,6 @@ static struct resource adapter_rom_resources[] = { { .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM } }; -#define ADAPTER_ROM_RESOURCES \ - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) - static struct resource video_rom_resource = { .name = "Video ROM", .start = 0xc0000, @@ -273,9 +270,6 @@ static struct resource standard_io_resources[] = { { .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -#define STANDARD_IO_RESOURCES \ - (sizeof standard_io_resources / sizeof standard_io_resources[0]) - #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) static int __init romchecksum(unsigned char *rom, unsigned long length) @@ -332,7 +326,7 @@ static void __init probe_roms(void) } /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { rom = isa_bus_to_virt(start); if (!romsignature(rom)) continue; @@ -1272,7 +1266,7 @@ static int __init request_standard_resources(void) request_resource(&iomem_resource, &video_ram_resource); /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < STANDARD_IO_RESOURCES; i++) + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) request_resource(&ioport_resource, &standard_io_resources[i]); return 0; } -- cgit