From 5d8b532af9e52ea89208f5ef31889f646e67ba28 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 16 Jan 2009 23:09:14 +0100
Subject: ACPI suspend: Fix compilation warnings in drivers/acpi/sleep.c

Fix two compilation warnings in drivers/acpi/sleep.c, one triggered
by unsetting CONFIG_SUSPEND and the other triggered by unsetting
CONFIG_HIBERNATION, by moving some code under the appropriate
#ifdefs .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/sleep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95f..a60c1f3bcb8 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -156,11 +156,11 @@ static int __init acpi_sleep_setup(char *str)
 #ifdef CONFIG_HIBERNATION
 		if (strncmp(str, "s4_nohwsig", 10) == 0)
 			acpi_no_s4_hw_signature();
+		if (strncmp(str, "s4_nonvs", 8) == 0)
+			acpi_s4_no_nvs();
 #endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
-		if (strncmp(str, "s4_nonvs", 8) == 0)
-			acpi_s4_no_nvs();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
-- 
cgit 


From 92ab78315c638515d0e81b0c70b2082f713582d9 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sat, 31 Jan 2009 17:24:43 +0100
Subject: x86/Voyager: make it build and boot

[
  mingo@elte.hu: these fixes are a subset of changes cherry-picked from:

     git://git.kernel.org:/pub/scm/linux/kernel/git/jejb/voyager-2.6.git

  They fix various problems that recent x86 changes caused in the Voyager
  subarchitecture: both APIC changes and cpumask changes and certain
  cleanups caused subarch assumptions to break.

  Most of these changes are obsolete as the subarch code has been removed
  from the x86 development tree - but we merge them upstream to make Voyager
  build and boot.
]

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irqinit_32.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1507ad4e674..10a09c2f182 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -78,15 +78,6 @@ void __init init_ISA_irqs(void)
 	}
 }
 
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-static struct irqaction irq2 = {
-	.handler = no_action,
-	.mask = CPU_MASK_NONE,
-	.name = "cascade",
-};
-
 DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[0 ... IRQ0_VECTOR - 1] = -1,
 	[IRQ0_VECTOR] = 0,
@@ -178,9 +169,6 @@ void __init native_init_IRQ(void)
 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
 
-	if (!acpi_ioapic)
-		setup_irq(2, &irq2);
-
 	/* setup after call gates are initialised (usually add in
 	 * the architecture specific gates)
 	 */
-- 
cgit 


From 9a8ecae87a2b698964b1db9ea504ba1099f479fc Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Sat, 31 Jan 2009 20:12:14 -0500
Subject: x86: add cache descriptors for Intel Core i7

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 48533d77be7..da299eb85fc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -36,8 +36,11 @@ static struct _cache_table cache_table[] __cpuinitdata =
 {
 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 	{ 0x23, LVL_3,      1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 	{ 0x25, LVL_3,      2048 },	/* 8-way set assoc, sectored cache, 64 byte line size */
@@ -85,6 +88,18 @@ static struct _cache_table cache_table[] __cpuinitdata =
 	{ 0x85, LVL_2,    2048 },	/* 8-way set assoc, 32 byte line size */
 	{ 0x86, LVL_2,     512 },	/* 4-way set assoc, 64 byte line size */
 	{ 0x87, LVL_2,    1024 },	/* 8-way set assoc, 64 byte line size */
+	{ 0xd0, LVL_3,     512 },	/* 4-way set assoc, 64 byte line size */
+	{ 0xd1, LVL_3,    1024 },	/* 4-way set assoc, 64 byte line size */
+	{ 0xd2, LVL_3,    2048 },	/* 4-way set assoc, 64 byte line size */
+	{ 0xd6, LVL_3,    1024 },	/* 8-way set assoc, 64 byte line size */
+	{ 0xd7, LVL_3,    2038 },	/* 8-way set assoc, 64 byte line size */
+	{ 0xd8, LVL_3,    4096 },	/* 12-way set assoc, 64 byte line size */
+	{ 0xdc, LVL_3,    2048 },	/* 12-way set assoc, 64 byte line size */
+	{ 0xdd, LVL_3,    4096 },	/* 12-way set assoc, 64 byte line size */
+	{ 0xde, LVL_3,    8192 },	/* 12-way set assoc, 64 byte line size */
+	{ 0xe2, LVL_3,    2048 },	/* 16-way set assoc, 64 byte line size */
+	{ 0xe3, LVL_3,    4096 },	/* 16-way set assoc, 64 byte line size */
+	{ 0xe4, LVL_3,    8192 },	/* 16-way set assoc, 64 byte line size */
 	{ 0x00, 0, 0}
 };
 
-- 
cgit 


From 10b888d6cec2688e65e9e128b14bf98ecd199da2 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 31 Jan 2009 14:50:07 -0800
Subject: irq, x86: fix lock status with numa_migrate_irq_desc

Eric Paris reported:

> I have an hp dl785g5 which is unable to successfully run
> 2.6.29-0.66.rc3.fc11.x86_64 or 2.6.29-rc2-next-20090126.  During bootup
> (early in userspace daemons starting) I get the below BUG, which quickly
> renders the machine dead.  I assume it is because sparse_irq_lock never
> gets released when the BUG kills that task.

Adjust lock sequence when migrating a descriptor with
CONFIG_NUMA_MIGRATE_IRQ_DESC enabled.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 1c4a1302536..9b0c480c383 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2528,14 +2528,15 @@ static void irq_complete_move(struct irq_desc **descp)
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
+
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
 #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
 		*descp = desc = move_irq_desc(desc, me);
 		/* get the new one */
 		cfg = desc->chip_data;
 #endif
-
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
+	}
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
-- 
cgit 


From a67798cd7bb130bf37f5ffb28f3260f4c10232db Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Fri, 30 Jan 2009 10:50:54 -0600
Subject: x86: push old stack address on irqstack for unwinder

Impact: Fixes dumpstack and KDB on 64 bits

This re-adds the old stack pointer to the top of the irqstack to help
with unwinding.  It was removed in commit d99015b1abbad743aa049b439c1e1dede6d0fa49
as part of the save_args out-of-line work.

Both dumpstack and KDB require this information.

Signed-off-by: Martin Hicks <mort@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/entry_64.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e28c7a98779..a1346217e43 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -346,6 +346,7 @@ ENTRY(save_args)
 	popq_cfi %rax			/* move return address... */
 	mov %gs:pda_irqstackptr,%rsp
 	EMPTY_FRAME 0
+	pushq_cfi %rbp			/* backlink for unwinder */
 	pushq_cfi %rax			/* ... to the new stack */
 	/*
 	 * We entered an interrupt context - irqs are off:
-- 
cgit 


From 858770619debfb9269add63e4ba8b7c6b5538dd1 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 3 Feb 2009 16:24:22 +0100
Subject: x86: APIC: enable workaround on AMD Fam10h CPUs

Impact: fix to enable APIC for AMD Fam10h on chipsets with a missing/b0rked
	ACPI MP table (MADT)

Booting a 32bit kernel on an AMD Fam10h CPU running on chipsets with
missing/b0rked MP table leads to a hang pretty early in the boot process
due to the APIC not being initialized. Fix that by falling back to the
default APIC base address in 32bit code, as it is done in the 64bit
codepath.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 4b6df2469fe..115449f869e 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1436,7 +1436,7 @@ static int __init detect_init_APIC(void)
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_AMD:
 		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
-		    (boot_cpu_data.x86 == 15))
+		    (boot_cpu_data.x86 >= 15))
 			break;
 		goto no_apic;
 	case X86_VENDOR_INTEL:
-- 
cgit 


From 62663ea8220366472fe20462831f2d69d7987439 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 3 Feb 2009 17:46:46 +0100
Subject: ACPI: cpufreq: Remove deprecated /proc/acpi/processor/../performance
 proc entries

They were long enough set deprecated...

Update Documentation/cpu-freq/users-guide.txt:
The deprecated files listed there seen not to exist for some time anymore
already.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/cpu/cpufreq/Kconfig | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index efae3b22a0f..65792c2cc46 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -245,17 +245,6 @@ config X86_E_POWERSAVER
 
 comment "shared options"
 
-config X86_ACPI_CPUFREQ_PROC_INTF
-	bool "/proc/acpi/processor/../performance interface (deprecated)"
-	depends on PROC_FS
-	depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
-	help
-	  This enables the deprecated /proc/acpi/processor/../performance
-	  interface. While it is helpful for debugging, the generic,
-	  cross-architecture cpufreq interfaces should be used.
-
-	  If in doubt, say N.
-
 config X86_SPEEDSTEP_LIB
 	tristate
 	default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
-- 
cgit 


From 48ec4d9537282a55d602136724f069faafcac8c8 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@infradead.org>
Date: Wed, 4 Feb 2009 15:54:45 -0500
Subject: x86, 64-bit: print DMI info in the oops trace

This patch echoes what we already do on 32-bit since
90f7d25c6b672137344f447a30a9159945ffea72, and prints the DMI
product name in show_regs, so that system specific problems can be
easily identified.

Signed-off-by: Kyle McMartin <kyle@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_64.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4..85b4cb5c198 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -40,6 +40,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
+#include <linux/dmi.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned long d0, d1, d2, d3, d6, d7;
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
+	const char *board;
 
 	printk("\n");
 	print_modules();
-	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
+	board = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (!board)
+		board = "";
+	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
+		init_utsname()->version, board);
 	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 	printk_address(regs->ip, 1);
 	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
-- 
cgit 


From a6a95406c676ffe4f9dee708eb404a17c69f7fdd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 4 Feb 2009 13:40:31 +0300
Subject: x86: fix hpet timer reinit for x86_64

There's a small problem with hpet_rtc_reinit function - it checks
for the:

	hpet_readl(HPET_COUNTER) - hpet_t1_cmp > 0

to continue increasing both the HPET_T1_CMP (register) and the
hpet_t1_cmp (variable).

But since the HPET_COUNTER is always 32-bit, if the hpet_t1_cmp
is 64-bit this condition will always be FALSE once the latter hits
the 32-bit boundary, and we can have a situation, when we don't
increase the HPET_T1_CMP register high enough.

The result - timer stops ticking, since HPET_T1_CMP becomes less,
than the COUNTER and never increased again.

The solution is (based on Linus's suggestion) to not compare 64-bits
(on 64-bit x86), but to do the comparison on 32-bit signed
integers.

Reported-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 64d5ad0b8ad..c761f914430 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1075,7 +1075,7 @@ static void hpet_rtc_timer_reinit(void)
 		hpet_t1_cmp += delta;
 		hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
 		lost_ints++;
-	} while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
+	} while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
 
 	if (lost_ints) {
 		if (hpet_rtc_flags & RTC_PIE)
-- 
cgit 


From 4560839939f4b4a96e21e80584f87308ac93c1da Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Wed, 4 Feb 2009 16:44:01 -0700
Subject: x86: fix grammar in user-visible BIOS warning

Fix user-visible grammo.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf6..c461f6d6907 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
 	printk(KERN_NOTICE
-		"%s detected: BIOS may corrupt low RAM, working it around.\n",
+		"%s detected: BIOS may corrupt low RAM, working around it.\n",
 		d->ident);
 
 	e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
-- 
cgit 


From 732553e567c2700ba5b9bccc6ec885c75779a94b Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Tue, 3 Feb 2009 17:46:43 +0100
Subject: [CPUFREQ] powernow-k8: Get transition latency from ACPI _PSS table

At this time, the PowerNow! driver for K8 uses an experimentally
derived formula to calculate transition latency.  The value it
provides is orders of magnitude too large on modern systems.
This patch replaces the formula with ACPI _PSS latency values
for more accuracy and better performance.

I've tested it on two 2nd generation Opteron systems, a 3rd
generation Operton system, and a Turion X2 without seeing any
stability problems.

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 5c28b37dea1..fb039cd345d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
 	free_cpumask_var(data->acpi_data.shared_cpu_map);
 }
 
+static int get_transition_latency(struct powernow_k8_data *data)
+{
+	int max_latency = 0;
+	int i;
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		int cur_latency = data->acpi_data.states[i].transition_latency
+			+ data->acpi_data.states[i].bus_master_latency;
+		if (cur_latency > max_latency)
+			max_latency = cur_latency;
+	}
+	/* value in usecs, needs to be in nanoseconds */
+	return 1000 * max_latency;
+}
+
 #else
 static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
 static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
+static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
 #endif /* CONFIG_X86_POWERNOW_K8_ACPI */
 
 /* Take a frequency, and issue the fid/vid transition command */
@@ -1173,7 +1188,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 		if (rc) {
 			goto err_out;
 		}
-	}
+		/* Take a crude guess here.
+		 * That guess was in microseconds, so multiply with 1000 */
+		pol->cpuinfo.transition_latency = (
+			 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
+			 ((1 << data->irt) * 30)) * 1000;
+	} else /* ACPI _PSS objects available */
+		pol->cpuinfo.transition_latency = get_transition_latency(data);
 
 	/* only run on specific CPU from here on */
 	oldmask = current->cpus_allowed;
@@ -1204,11 +1225,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 		cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
 	data->available_cores = pol->cpus;
 
-	/* Take a crude guess here.
-	 * That guess was in microseconds, so multiply with 1000 */
-	pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
-	    + (3 * (1 << data->irt) * 10)) * 1000;
-
 	if (cpu_family == CPU_HW_PSTATE)
 		pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
 	else
-- 
cgit 


From ff08f76d738d0ec0f334b187f61e160caa321d54 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 4 Feb 2009 13:40:31 +0300
Subject: x86: clean up hpet timer reinit

Implement Linus's suggestion: introduce the hpet_cnt_ahead()
helper function to compare hpet time values - like other
wrapping counter comparisons are abstracted away elsewhere.
(jiffies, ktime_t, etc.)

Reported-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c761f914430..388254f69a2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -897,13 +897,21 @@ static unsigned long hpet_rtc_flags;
 static int hpet_prev_update_sec;
 static struct rtc_time hpet_alarm_time;
 static unsigned long hpet_pie_count;
-static unsigned long hpet_t1_cmp;
+static u32 hpet_t1_cmp;
 static unsigned long hpet_default_delta;
 static unsigned long hpet_pie_delta;
 static unsigned long hpet_pie_limit;
 
 static rtc_irq_handler irq_handler;
 
+/*
+ * Check that the hpet counter c1 is ahead of the c2
+ */
+static inline int hpet_cnt_ahead(u32 c1, u32 c2)
+{
+	return (s32)(c2 - c1) < 0;
+}
+
 /*
  * Registers a IRQ handler.
  */
@@ -1075,7 +1083,7 @@ static void hpet_rtc_timer_reinit(void)
 		hpet_t1_cmp += delta;
 		hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
 		lost_ints++;
-	} while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
+	} while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER)));
 
 	if (lost_ints) {
 		if (hpet_rtc_flags & RTC_PIE)
-- 
cgit 


From e736ad548db152776de61d7a26805cfae77ce5ce Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Fri, 6 Feb 2009 16:52:05 -0800
Subject: x86: add clflush before monitor for Intel 7400 series

For Intel 7400 series CPUs, the recommendation is to use a clflush on the
monitored address just before monitor and mwait pair [1].

This clflush makes sure that there are no false wakeups from mwait when the
monitored address was recently written to.

[1] "MONITOR/MWAIT Recommendations for Intel Xeon Processor 7400 series"
    section in specification update document of 7400 series
    http://download.intel.com/design/xeon/specupdt/32033601.pdf

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel.c | 3 +++
 arch/x86/kernel/process.c   | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 430e5c38a54..24ff26a38ad 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		ds_init_intel(c);
 	}
 
+	if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
+		set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
+
 #ifdef CONFIG_X86_64
 	if (c->x86 == 15)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e68bb9e3086..6d12f7e37f8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 
 	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
+		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
@@ -194,6 +197,9 @@ static void mwait_idle(void)
 	struct power_trace it;
 	if (!need_resched()) {
 		trace_power_start(&it, POWER_CSTATE, 1);
+		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
-- 
cgit 


From 3f4a739c6accd651a11fcf3c7a20ec8147c42660 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 8 Feb 2009 16:18:03 -0800
Subject: x86: find nr_irqs_gsi with mp_ioapic_routing

Impact: find right nr_irqs_gsi on some systems.

One test-system has gap between gsi's:

[    0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0])
[    0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23
[    0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48])
[    0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54
[    0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56])
[    0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62
...
[    0.000000] nr_irqs_gsi: 38

So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic.

need to get that with acpi_probe_gsi when acpi io_apic is used

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++
 arch/x86/kernel/io_apic.c   | 20 +++++++++++++++-----
 2 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f43..7678f10c456 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
+int __init acpi_probe_gsi(void)
+{
+	int idx;
+	int gsi;
+	int max_gsi = 0;
+
+	if (acpi_disabled)
+		return 0;
+
+	if (!acpi_ioapic)
+		return 0;
+
+	max_gsi = 0;
+	for (idx = 0; idx < nr_ioapics; idx++) {
+		gsi = mp_ioapic_routing[idx].gsi_end;
+
+		if (gsi > max_gsi)
+			max_gsi = gsi;
+	}
+
+	return max_gsi + 1;
+}
+
 static void assign_to_mp_irq(struct mp_config_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9b0c480c383..bc7ac4da90d 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic)
 
 void __init probe_nr_irqs_gsi(void)
 {
-	int idx;
 	int nr = 0;
 
-	for (idx = 0; idx < nr_ioapics; idx++)
-		nr += io_apic_get_redir_entries(idx) + 1;
-
-	if (nr > nr_irqs_gsi)
+	nr = acpi_probe_gsi();
+	if (nr > nr_irqs_gsi) {
 		nr_irqs_gsi = nr;
+	} else {
+		/* for acpi=off or acpi is not compiled in */
+		int idx;
+
+		nr = 0;
+		for (idx = 0; idx < nr_ioapics; idx++)
+			nr += io_apic_get_redir_entries(idx) + 1;
+
+		if (nr > nr_irqs_gsi)
+			nr_irqs_gsi = nr;
+	}
+
+	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
 }
 
 /* --------------------------------------------------------------------------
-- 
cgit 


From 55a8ba4b7f76bebd7e8ce3f74c04b140627a1bad Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Fri, 6 Feb 2009 10:29:35 -0800
Subject: x86, vmi: put a missing paravirt_release_pmd in pgd_dtor

Commit 6194ba6ff6ccf8d5c54c857600843c67aa82c407 ("x86: don't special-case
pmd allocations as much") made changes to the way we handle pmd allocations,
and while doing that it dropped a call to  paravirt_release_pd on the
pgd page from the pgd_dtor code path.

As a result of this missing release, the hypervisor is now unaware of the
pgd page being freed, and as a result it ends up tracking this page as a
page table page.

After this the guest may start using the same page for other purposes, and
depending on what use the page is put to, it may result in various performance
and/or functional issues ( hangs, reboots).

Since this release is only required for VMI, I now release the pgd page from
the (vmi)_pgd_free hook.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
---
 arch/x86/kernel/vmi_32.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 1d3302cc2dd..bef58b4982d 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,6 +320,16 @@ static void vmi_release_pmd(unsigned long pfn)
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
 }
 
+/*
+ * We use the pgd_free hook for releasing the pgd page:
+ */
+static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
+
+	vmi_ops.release_page(pfn, VMI_PAGE_L2);
+}
+
 /*
  * Helper macros for MMU update flags.  We can defer updates until a flush
  * or page invalidation only if the update is to the current address space
@@ -762,6 +772,7 @@ static inline int __init activate_vmi(void)
 	if (vmi_ops.release_page) {
 		pv_mmu_ops.release_pte = vmi_release_pte;
 		pv_mmu_ops.release_pmd = vmi_release_pmd;
+		pv_mmu_ops.pgd_free = vmi_pgd_free;
 	}
 
 	/* Set linear is needed in all cases */
-- 
cgit 


From d315760ffa261c15ff92699ac6f514112543d7ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:39 +0900
Subject: x86: fix math_emu register frame access

do_device_not_available() is the handler for #NM and it declares that
it takes a unsigned long and calls math_emu(), which takes a long
argument and surprisingly expects the stack frame starting at the zero
argument would match struct math_emu_info, which isn't true regardless
of configuration in the current code.

This patch makes do_device_not_available() take struct pt_regs like
other exception handlers and initialize struct math_emu_info with
pointer to it and pass pointer to the math_emu_info to math_emulate()
like normal C functions do.  This way, unless gcc makes a copy of
struct pt_regs in do_device_not_available(), the register frame is
correctly accessed regardless of kernel configuration or compiler
used.

This doesn't fix all math_emu problems but it at least gets it
somewhat working.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284..7932338d7cb 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -896,7 +896,7 @@ asmlinkage void math_state_restore(void)
 EXPORT_SYMBOL_GPL(math_state_restore);
 
 #ifndef CONFIG_MATH_EMULATION
-asmlinkage void math_emulate(long arg)
+void math_emulate(struct math_emu_info *info)
 {
 	printk(KERN_EMERG
 		"math-emulation not enabled and no coprocessor found.\n");
@@ -906,16 +906,19 @@ asmlinkage void math_emulate(long arg)
 }
 #endif /* CONFIG_MATH_EMULATION */
 
-dotraplinkage void __kprobes
-do_device_not_available(struct pt_regs *regs, long error)
+dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
 {
 #ifdef CONFIG_X86_32
 	if (read_cr0() & X86_CR0_EM) {
-		conditional_sti(regs);
-		math_emulate(0);
+		struct math_emu_info info = { };
+
+		conditional_sti(&regs);
+
+		info.regs = &regs;
+		math_emulate(&info);
 	} else {
 		math_state_restore(); /* interrupts still off */
-		conditional_sti(regs);
+		conditional_sti(&regs);
 	}
 #else
 	math_state_restore();
-- 
cgit 


From b52af40923fc91a12e3c7152d833e0c0c6a508f6 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Tue, 10 Feb 2009 09:21:07 +0100
Subject: i8327: fix outb() parameter order

In i8237A_resume(), when resetting the DMA controller, the parameters to
dma_outb() were mixed up.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
[ cleaned up the file a tiny bit. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8237.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index dbd6c1d1b63..b42ca694dc6 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c
@@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev)
 
 	flags = claim_dma_lock();
 
-	dma_outb(DMA1_RESET_REG, 0);
-	dma_outb(DMA2_RESET_REG, 0);
+	dma_outb(0, DMA1_RESET_REG);
+	dma_outb(0, DMA2_RESET_REG);
 
-	for (i = 0;i < 8;i++) {
+	for (i = 0; i < 8; i++) {
 		set_dma_addr(i, 0x000000);
 		/* DMA count is a bit weird so this is not 0 */
 		set_dma_count(i, 1);
@@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
 }
 
 static struct sysdev_class i8237_sysdev_class = {
-	.name = "i8237",
-	.suspend = i8237A_suspend,
-	.resume = i8237A_resume,
+	.name		= "i8237",
+	.suspend	= i8237A_suspend,
+	.resume		= i8237A_resume,
 };
 
 static struct sys_device device_i8237A = {
-	.id	= 0,
-	.cls	= &i8237_sysdev_class,
+	.id		= 0,
+	.cls		= &i8237_sysdev_class,
 };
 
 static int __init i8237A_init_sysfs(void)
@@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void)
 		error = sysdev_register(&device_i8237A);
 	return error;
 }
-
 device_initcall(i8237A_init_sysfs);
-- 
cgit 


From e3944bfac961cd7fc82f3b3143c55dc375748569 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 10 Feb 2009 13:07:13 -0500
Subject: tracing, x86: fix fixup section to return to original code

Impact: fix to prevent a kernel crash on fault

If for some reason the pointer to the parent function on the
stack takes a fault, the fix up code will not return back to
the original faulting code. This can lead to unpredictable
results and perhaps even a kernel panic.

A fault should not happen, but if it does, we should simply
disable the tracer, warn, and continue running the kernel.
It should not lead to a kernel crash.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 arch/x86/kernel/ftrace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1b43086b097..9d549e4fe88 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -491,13 +491,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		"1: " _ASM_MOV " (%[parent_old]), %[old]\n"
 		"2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
 		"   movl $0, %[faulted]\n"
+		"3:\n"
 
 		".section .fixup, \"ax\"\n"
-		"3: movl $1, %[faulted]\n"
+		"4: movl $1, %[faulted]\n"
+		"   jmp 3b\n"
 		".previous\n"
 
-		_ASM_EXTABLE(1b, 3b)
-		_ASM_EXTABLE(2b, 3b)
+		_ASM_EXTABLE(1b, 4b)
+		_ASM_EXTABLE(2b, 4b)
 
 		: [parent_replaced] "=r" (parent), [old] "=r" (old),
 		  [faulted] "=r" (faulted)
-- 
cgit 


From f47a454db9129d2e61b224a40f4365cdd4f83042 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 10 Feb 2009 11:53:23 -0500
Subject: tracing, x86: fix constraint for parent variable

The constraint used for retrieving and restoring the parent function
pointer is incorrect. The parent variable is a pointer, and the
address of the pointer is modified by the asm statement and not
the pointer itself. It is incorrect to pass it in as an output
constraint since the asm will never update the pointer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 9d549e4fe88..231bdd3c5b1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -488,8 +488,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 	 * ignore such a protection.
 	 */
 	asm volatile(
-		"1: " _ASM_MOV " (%[parent_old]), %[old]\n"
-		"2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
+		"1: " _ASM_MOV " (%[parent]), %[old]\n"
+		"2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
 		"   movl $0, %[faulted]\n"
 		"3:\n"
 
@@ -501,9 +501,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		_ASM_EXTABLE(1b, 4b)
 		_ASM_EXTABLE(2b, 4b)
 
-		: [parent_replaced] "=r" (parent), [old] "=r" (old),
-		  [faulted] "=r" (faulted)
-		: [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+		: [old] "=r" (old), [faulted] "=r" (faulted)
+		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 		: "memory"
 	);
 
-- 
cgit 


From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 11 Feb 2009 15:10:27 +0100
Subject: x86, ptrace, mm: fix double-free on race

Ptrace_detach() races with __ptrace_unlink() if the traced task is
reaped while detaching. This might cause a double-free of the BTS
buffer.

Change the ptrace_detach() path to only do the memory accounting in
ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace()
which will be called from __ptrace_unlink().

The fix follows a proposal from Oleg Nesterov.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb..5a4c23d8989 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
 
 static void ptrace_bts_detach(struct task_struct *child)
 {
-	if (unlikely(child->bts)) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-
-		ptrace_bts_free_buffer(child);
-	}
+	/*
+	 * Ptrace_detach() races with ptrace_untrace() in case
+	 * the child dies and is reaped by another thread.
+	 *
+	 * We only do the memory accounting at this point and
+	 * leave the buffer deallocation and the bts tracer
+	 * release to ptrace_bts_untrace() which will be called
+	 * later on with tasklist_lock held.
+	 */
+	release_locked_buffer(child->bts_buffer, child->bts_size);
 }
 #else
 static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-- 
cgit 


From d85cf93da66977dbc645352be1b2084a659d8a0b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 12 Feb 2009 10:02:56 -0800
Subject: x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption

Impact: avoid access to percpu vars in preempible context

They are intended to be used whenever there's the possibility
that there's some stale state which is going to be overwritten
with a queued update, or to force a state change when we may be
in lazy mode.  Either way, we could end up calling it with
preemption enabled, so wrap the functions in their own little
preempt-disable section so they can be safely called in any
context (though preemption should never be enabled if we're actually
in a lazy state).

(Move out of line to avoid #include dependencies.)

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb60887..dcba6c567a2 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,30 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 	return __get_cpu_var(paravirt_lazy_mode);
 }
 
+void arch_flush_lazy_mmu_mode(void)
+{
+	preempt_disable();
+
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+		arch_leave_lazy_mmu_mode();
+		arch_enter_lazy_mmu_mode();
+	}
+
+	preempt_enable();
+}
+
+void arch_flush_lazy_cpu_mode(void)
+{
+	preempt_disable();
+
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+		arch_leave_lazy_cpu_mode();
+		arch_enter_lazy_cpu_mode();
+	}
+
+	preempt_enable();
+}
+
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
-- 
cgit 


From 34b0900d323122113683685b200aae9f9b75e63b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 12 Feb 2009 21:30:48 +0100
Subject: x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context

Impact: Catch cases where lazy MMU state is active in a preemtible context

arch_flush_lazy_mmu_cpu() has been changed to disable preemption so
the checks in enter/leave will never trigger. Put the preemtible()
check into arch_flush_lazy_mmu_cpu() to catch such cases.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index dcba6c567a2..c6520a4e85d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -273,6 +273,7 @@ void arch_flush_lazy_mmu_mode(void)
 	preempt_disable();
 
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+		WARN_ON(preempt_count() == 1);
 		arch_leave_lazy_mmu_mode();
 		arch_enter_lazy_mmu_mode();
 	}
@@ -285,6 +286,7 @@ void arch_flush_lazy_cpu_mode(void)
 	preempt_disable();
 
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+		WARN_ON(preempt_count() == 1);
 		arch_leave_lazy_cpu_mode();
 		arch_enter_lazy_cpu_mode();
 	}
-- 
cgit 


From b13e24644c138d0ddbc451403c30a96b09bfd556 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Thu, 12 Feb 2009 18:48:53 -0800
Subject: x86, hpet: fix for LS21 + HPET = boot hang

Between 2.6.23 and 2.6.24-rc1 a change was made that broke IBM LS21
systems that had the HPET enabled in the BIOS, resulting in boot hangs
for x86_64.

Specifically commit b8ce33590687888ebb900d09557b8807c4539022, which
merges the i386 and x86_64 HPET code.

Prior to this commit, when we setup the HPET timers in x86_64, we did
the following:

	hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
                    HPET_TN_32BIT, HPET_T0_CFG);

However after the i386/x86_64 HPET merge, we do the following:

	cfg = hpet_readl(HPET_Tn_CFG(timer));
	cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
			HPET_TN_SETVAL | HPET_TN_32BIT;
	hpet_writel(cfg, HPET_Tn_CFG(timer));

However on LS21s with HPET enabled in the BIOS, the HPET_T0_CFG register
boots with Level triggered interrupts (HPET_TN_LEVEL) enabled. This
causes the periodic interrupt to be not so periodic, and that results in
the boot time hang I reported earlier in the delay calibration.

My fix: Always disable HPET_TN_LEVEL when setting up periodic mode.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 64d5ad0b8ad..5c8da2c2c18 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
 		now = hpet_readl(HPET_COUNTER);
 		cmp = now + (unsigned long) delta;
 		cfg = hpet_readl(HPET_Tn_CFG(timer));
+		/* Make sure we use edge triggered interrupts */
+		cfg &= ~HPET_TN_LEVEL;
 		cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
 		       HPET_TN_SETVAL | HPET_TN_32BIT;
 		hpet_writel(cfg, HPET_Tn_CFG(timer));
-- 
cgit 


From e49590b6dd356f8ef10ba3531a29e5086f6f2e3a Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Fri, 13 Feb 2009 20:56:18 -0500
Subject: x86, olpc: fix model detection without OFW

Impact: fix "garbled display, laptop is unusable" bug

Commit e51a1ac2dfca9ad869471e88f828281db7e810c0 ("x86, olpc: fix endian
bug in openfirmware workaround") breaks model comparison on OLPC; the value
0xc2 needs to be scaled up by olpc_board().

The pre-patch version was wrong, but accidentally worked anyway
(big-endian 0xc2 is big enough to satisfy all other board revisions,
but little endian 0xc2 is not).

Signed-off-by: Chris Ball <cjb@laptop.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Andres Salomon <dilinger@queued.net>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/olpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1..4006c522adc 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
 static void __init platform_detect(void)
 {
 	/* stopgap until OFW support is added to the kernel */
-	olpc_platform_info.boardrev = 0xc2;
+	olpc_platform_info.boardrev = olpc_board(0xc2);
 }
 #endif
 
-- 
cgit 


From be716615fe596ee117292dc615e95f707fb67fd1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 13 Jan 2009 23:36:34 +0100
Subject: x86, vm86: fix preemption bug

Commit 3d2a71a596bd9c761c8487a2178e95f8a61da083 ("x86, traps: converge
do_debug handlers") changed the preemption disable logic of do_debug()
so vm86_handle_trap() is called with preemption disabled resulting in:

 BUG: sleeping function called from invalid context at include/linux/kernel.h:155
 in_atomic(): 1, irqs_disabled(): 0, pid: 3005, name: dosemu.bin
 Pid: 3005, comm: dosemu.bin Tainted: G        W  2.6.29-rc1 #51
 Call Trace:
  [<c050d669>] copy_to_user+0x33/0x108
  [<c04181f4>] save_v86_state+0x65/0x149
  [<c0418531>] handle_vm86_trap+0x20/0x8f
  [<c064e345>] do_debug+0x15b/0x1a4
  [<c064df1f>] debug_stack_correct+0x27/0x2c
  [<c040365b>] sysenter_do_call+0x12/0x2f
 BUG: scheduling while atomic: dosemu.bin/3005/0x10000001

Restore the original calling convention and reenable preemption before
calling handle_vm86_trap().

Reported-by: Michal Suchanek <hramrach@centrum.cz>
Cc: stable@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7932338d7cb..a9e7548e179 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
 		local_irq_enable();
 }
 
+static inline void conditional_cli(struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_disable();
+}
+
 static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
 
 #ifdef CONFIG_X86_32
 debug_vm86:
+	/* reenable preemption: handle_vm86_trap() might sleep */
+	dec_preempt_count();
 	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
-	preempt_conditional_cli(regs);
+	conditional_cli(regs);
 	return;
 #endif
 
-- 
cgit 


From a0abd520fd69295f4a3735e29a9448a32e101d47 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 16 Feb 2009 17:31:58 -0600
Subject: cpumask: fix powernow-k8: partial revert of
 2fdf66b491ac706657946442789ec644cc317e1a

Impact: fix powernow-k8 when acpi=off (or other error).

There was a spurious change introduced into powernow-k8 in this patch:
so that we try to "restore" the cpus_allowed we never saved.  We revert
that file.

See lkml "[PATCH] x86/powernow: fix cpus_allowed brokage when
acpi=off" from Yinghai for the bug report.

Cc: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index fb039cd345d..6428aa17b40 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	data->cpu = pol->cpu;
 	data->currpstate = HW_PSTATE_INVALID;
 
-	rc = powernow_k8_cpu_init_acpi(data);
-	if (rc) {
+	if (powernow_k8_cpu_init_acpi(data)) {
 		/*
 		 * Use the PSB BIOS structure. This is only availabe on
 		 * an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 			       "ACPI maintainers and complain to your BIOS "
 			       "vendor.\n");
 #endif
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		if (pol->cpu != 0) {
 			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
 			       "CPU other than CPU0. Complain to your BIOS "
 			       "vendor.\n");
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		rc = find_psb_table(data);
 		if (rc) {
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		/* Take a crude guess here.
 		 * That guess was in microseconds, so multiply with 1000 */
-- 
cgit 


From bf51935f3e988e0ed6f34b55593e5912f990750a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 17 Feb 2009 06:01:30 -0800
Subject: x86, rcu: fix strange load average and ksoftirqd behavior

Damien Wyart reported high ksoftirqd CPU usage (20%) on an
otherwise idle system.

The function-graph trace Damien provided:

>   799.521187 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521371 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521555 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521738 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521934 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522068 |   1)  ksoftir-2324  |               |                rcu_check_callbacks() {
>   799.522208 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522392 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522575 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522759 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522956 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523074 |   1)  ksoftir-2324  |               |                  rcu_check_callbacks() {
>   799.523214 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523397 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523579 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523762 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523960 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524079 |   1)  ksoftir-2324  |               |                  rcu_check_callbacks() {
>   799.524220 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524403 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524587 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524770 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
> [ . . . ]

Shows rcu_check_callbacks() being invoked way too often. It should be called
once per jiffy, and here it is called no less than 22 times in about
3.5 milliseconds, meaning one call every 160 microseconds or so.

Why do we need to call rcu_pending() and rcu_check_callbacks() from the
idle loop of 32-bit x86, especially given that no other architecture does
this?

The following patch removes the call to rcu_pending() and
rcu_check_callbacks() from the x86 32-bit idle loop in order to
reduce the softirq load on idle systems.

Reported-by: Damien Wyart <damien.wyart@free.fr>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b..bd4da2af08a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -104,9 +104,6 @@ void cpu_idle(void)
 			check_pgt_cache();
 			rmb();
 
-			if (rcu_pending(cpu))
-				rcu_check_callbacks(cpu, 0);
-
 			if (cpu_is_offline(cpu))
 				play_dead();
 
-- 
cgit 


From 6ec68bff3c81e776a455f6aca95c8c5f1d630198 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Thu, 12 Feb 2009 13:39:26 +0100
Subject: x86, mce: reinitialize per cpu features on resume

Impact: Bug fix

This fixes a long standing bug in the machine check code. On resume the
boot CPU wouldn't get its vendor specific state like thermal handling
reinitialized. This means the boot cpu wouldn't ever get any thermal
events reported again.

Call the respective initialization functions on resume

v2: Remove ancient init because they don't have a resume device anyways.
    Pointed out by Thomas Gleixner.
v3: Now fix the Subject too to reflect v2 change

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 1c838032fd3..1f184efb6bc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable);
 static int mce_resume(struct sys_device *dev)
 {
 	mce_init(NULL);
+	mce_cpu_features(&current_cpu_data);
 	return 0;
 }
 
-- 
cgit 


From 380851bc6b1b4107c61dfa2997f9095dcf779336 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Thu, 12 Feb 2009 13:39:33 +0100
Subject: x86, mce: use force_sig_info to kill process in machine check

Impact: bug fix (with tolerant == 3)

do_exit cannot be called directly from the exception handler because
it can sleep and the exception handler runs on the exception stack.
Use force_sig() instead.

Based on a earlier patch by Ying Huang who debugged the problem.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 1f184efb6bc..25cf624eccb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 		 * If we know that the error was in user space, send a
 		 * SIGBUS.  Otherwise, panic if tolerance is low.
 		 *
-		 * do_exit() takes an awful lot of locks and has a slight
+		 * force_sig() takes an awful lot of locks and has a slight
 		 * risk of deadlocking.
 		 */
 		if (user_space) {
-			do_exit(SIGBUS);
+			force_sig(SIGBUS, current);
 		} else if (panic_on_oops || tolerant < 2) {
 			mce_panic("Uncorrected machine check",
 				&panicm, mcestart);
-- 
cgit 


From 07db1c140eb233971341396e492cc73d4280e698 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Thu, 12 Feb 2009 13:39:35 +0100
Subject: x86, mce: fix ifdef for 64bit thermal apic vector clear on shutdown

Impact: Bugfix

The ifdef for the apic clear on shutdown for the 64bit intel thermal
vector was incorrect and never triggered. Fix that.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 115449f869e..570f36e44e5 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -862,7 +862,7 @@ void clear_local_APIC(void)
 	}
 
 	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
 	if (maxlvt >= 5) {
 		v = apic_read(APIC_LVTTHMR);
 		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-- 
cgit 


From 48ffc70b675aa7798a52a2e92e20f6cce9140b3d Mon Sep 17 00:00:00 2001
From: Alok N Kataria <akataria@vmware.com>
Date: Wed, 18 Feb 2009 12:33:55 -0800
Subject: x86, vmi: TSC going backwards check in vmi clocksource

Impact: fix time warps under vmware

Similar to the check for TSC going backwards in the TSC clocksource,
we also need this check for VMI clocksource.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: Zachary Amsden <zach@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: stable@kernel.org
---
 arch/x86/kernel/vmiclock_32.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index c4c1f9e0940..bde106cae0a 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -283,10 +283,13 @@ void __devinit vmi_time_ap_init(void)
 #endif
 
 /** vmi clocksource */
+static struct clocksource clocksource_vmi;
 
 static cycle_t read_real_cycles(void)
 {
-	return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
+	cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
+	return ret >= clocksource_vmi.cycle_last ?
+		ret : clocksource_vmi.cycle_last;
 }
 
 static struct clocksource clocksource_vmi = {
-- 
cgit 


From cc3ca22063784076bd240fda87217387a8f2ae92 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 20 Feb 2009 23:35:51 -0800
Subject: x86, mce: remove incorrect __cpuinit for mce_cpu_features()

Impact: Bug fix on UP

Checkin 6ec68bff3c81e776a455f6aca95c8c5f1d630198:
    x86, mce: reinitialize per cpu features on resume

introduced a call to mce_cpu_features() in the resume path, in order
for the MCE machinery to get properly reinitialized after a resume.
However, this function (and its successors) was flagged __cpuinit,
which becomes __init on UP configurations (on SMP suspend/resume
requires CPU hotplug and so this would not be seen.)

Remove the offending __cpuinit annotations for mce_cpu_features() and
its successor functions.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c       | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c   | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 25cf624eccb..fe79985ce0f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
 
 }
 
-static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
+static void mce_cpu_features(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094..f2ee0ae29bd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr)
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
-void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
+void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
 	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd3..f44c3662436 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -30,7 +30,7 @@ asmlinkage void smp_thermal_interrupt(void)
 	irq_exit();
 }
 
-static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
+static void intel_init_thermal(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int tm2 = 0;
@@ -84,7 +84,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
 	return;
 }
 
-void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c)
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 	intel_init_thermal(c);
 }
-- 
cgit 


From e6bd6760c92dc8475c79c4c4a8a16ac313c0b93d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sun, 15 Feb 2009 22:45:49 +0100
Subject: x86_64: acpi/wakeup_64 cleanup

- remove %ds re-set, it's already set in wakeup_long64
- remove double labels and alignment (ENTRY already adds both)
- use meaningful resume point labelname
- skip alignment while jumping from wakeup_long64 to the resume point
- remove .size, .type and unused labels
[v2]
- added ENDPROCs

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/wakeup_64.S | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index bcc293423a7..b5dee6a0de3 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -13,7 +13,6 @@
 	 * Hooray, we are in Long 64-bit mode (but still running in low memory)
 	 */
 ENTRY(wakeup_long64)
-wakeup_long64:
 	movq	saved_magic, %rax
 	movq	$0x123456789abcdef0, %rdx
 	cmpq	%rdx, %rax
@@ -34,16 +33,12 @@ wakeup_long64:
 
 	movq	saved_rip, %rax
 	jmp	*%rax
+ENDPROC(wakeup_long64)
 
 bogus_64_magic:
 	jmp	bogus_64_magic
 
-	.align 2
-	.p2align 4,,15
-.globl do_suspend_lowlevel
-	.type	do_suspend_lowlevel,@function
-do_suspend_lowlevel:
-.LFB5:
+ENTRY(do_suspend_lowlevel)
 	subq	$8, %rsp
 	xorl	%eax, %eax
 	call	save_processor_state
@@ -67,7 +62,7 @@ do_suspend_lowlevel:
 	pushfq
 	popq	pt_regs_flags(%rax)
 
-	movq	$.L97, saved_rip(%rip)
+	movq	$resume_point, saved_rip(%rip)
 
 	movq	%rsp, saved_rsp
 	movq	%rbp, saved_rbp
@@ -79,13 +74,9 @@ do_suspend_lowlevel:
 	movl	$3, %edi
 	xorl	%eax, %eax
 	jmp	acpi_enter_sleep_state
-.L97:
-	.p2align 4,,7
-.L99:
-	.align 4
-	movl	$24, %eax
-	movw	%ax, %ds
 
+	.align 4
+resume_point:
 	/* We don't restore %rax, it must be 0 anyway */
 	movq	$saved_context, %rax
 	movq	saved_context_cr4(%rax), %rbx
@@ -117,12 +108,9 @@ do_suspend_lowlevel:
 	xorl	%eax, %eax
 	addq	$8, %rsp
 	jmp	restore_processor_state
-.LFE5:
-.Lfe5:
-	.size	do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel
-	
+ENDPROC(do_suspend_lowlevel)
+
 .data
-ALIGN
 ENTRY(saved_rbp)	.quad	0
 ENTRY(saved_rsi)	.quad	0
 ENTRY(saved_rdi)	.quad	0
-- 
cgit 


From 6defa2fe2019f3729933516fba5cfd75eecd07de Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sun, 15 Feb 2009 22:46:45 +0100
Subject: x86_64: Fix S3 fail path

As acpi_enter_sleep_state can fail, take this into account in
do_suspend_lowlevel and don't return to the do_suspend_lowlevel's
caller. This would break (currently) fpu status and preempt count.

Technically, this means use `call' instead of `jmp' and `jmp' to
the `resume_point' after the `call' (i.e. if
acpi_enter_sleep_state returns=fails). `resume_point' will handle
the restore of fpu and preempt count gracefully.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/wakeup_64.S | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index b5dee6a0de3..96258d9dc97 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -73,7 +73,9 @@ ENTRY(do_suspend_lowlevel)
 	addq	$8, %rsp
 	movl	$3, %edi
 	xorl	%eax, %eax
-	jmp	acpi_enter_sleep_state
+	call	acpi_enter_sleep_state
+	/* in case something went wrong, restore the machine status and go on */
+	jmp	resume_point
 
 	.align 4
 resume_point:
-- 
cgit 


From 936577c61d0c10b8929608a92c98d839b22053bc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 22 Feb 2009 10:27:49 -0800
Subject: x86: Add IRQF_TIMER to legacy x86 timer interrupt descriptors

Right now nobody cares, but the suspend/resume code will eventually want
to suspend device interrupts without suspending the timer, and will
depend on this flag to know.

The modern x86 timer infrastructure uses the local APIC timers and never
shows up as a device interrupt at all, so it isn't affected and doesn't
need any of this.

Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/time_64.c     | 2 +-
 arch/x86/kernel/vmiclock_32.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index e6e695acd72..241ec3923f6 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -115,7 +115,7 @@ unsigned long __init calibrate_cpu(void)
 
 static struct irqaction irq0 = {
 	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING,
+	.flags		= IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER,
 	.mask		= CPU_MASK_NONE,
 	.name		= "timer"
 };
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index bde106cae0a..e5b088fffa4 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -202,7 +202,7 @@ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
 static struct irqaction vmi_clock_action  = {
 	.name 		= "vmi-timer",
 	.handler 	= vmi_timer_interrupt,
-	.flags 		= IRQF_DISABLED | IRQF_NOBALANCING,
+	.flags 		= IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
 	.mask 		= CPU_MASK_ALL,
 };
 
-- 
cgit 


From 770824bdc421ff58a64db608294323571c949f4c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 22 Feb 2009 18:38:50 +0100
Subject: PM: Split up sysdev_[suspend|resume] from device_power_[down|up]

Move the sysdev_suspend/resume from the callee to the callers, with
no real change in semantics, so that we can rework the disabling of
interrupts during suspend/hibernation.

This is based on an earlier patch from Linus.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/apm_32.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 98807bb095a..266ec6c18b6 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1192,6 +1192,7 @@ static int suspend(int vetoable)
 	device_suspend(PMSG_SUSPEND);
 	local_irq_disable();
 	device_power_down(PMSG_SUSPEND);
+	sysdev_suspend(PMSG_SUSPEND);
 
 	local_irq_enable();
 
@@ -1208,6 +1209,7 @@ static int suspend(int vetoable)
 	if (err != APM_SUCCESS)
 		apm_error("suspend", err);
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
+	sysdev_resume();
 	device_power_up(PMSG_RESUME);
 	local_irq_enable();
 	device_resume(PMSG_RESUME);
@@ -1228,6 +1230,7 @@ static void standby(void)
 
 	local_irq_disable();
 	device_power_down(PMSG_SUSPEND);
+	sysdev_suspend(PMSG_SUSPEND);
 	local_irq_enable();
 
 	err = set_system_power_state(APM_STATE_STANDBY);
@@ -1235,6 +1238,7 @@ static void standby(void)
 		apm_error("standby", err);
 
 	local_irq_disable();
+	sysdev_resume();
 	device_power_up(PMSG_RESUME);
 	local_irq_enable();
 }
-- 
cgit