From 7760ec77ab2a9e48bdd0d13341446a8a51f0b9f1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 18:10:13 +0530 Subject: x86: smp.h remove obsolete function declaration Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 830b9fcb642..83a4cc07431 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -22,7 +22,6 @@ extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_callin_map; -extern void (*mtrr_hook)(void); extern void zap_low_mappings(void); extern int __cpuinit get_local_pda(int cpu); -- cgit From dacf7333571d770366bff74d10b56aa545434605 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 17:26:35 +0530 Subject: x86: smp.h move zap_low_mappings declartion to tlbflush.h Impact: cleanup, moving NON-SMP stuff from smp.h Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/tlbflush.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 83a4cc07431..64c9e848f13 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -22,8 +22,6 @@ extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_callin_map; -extern void zap_low_mappings(void); - extern int __cpuinit get_local_pda(int cpu); extern int smp_num_siblings; diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb54911..aed0b700b83 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -175,4 +175,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } +extern void zap_low_mappings(void); + #endif /* _ASM_X86_TLBFLUSH_H */ -- cgit From 6e5385d44b2df05e50a8d07ba0e14d3e32685237 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 18:11:35 +0530 Subject: x86: smp.h move prefill_possible_map declartion to cpu.h Impact: cleanup, moving NON-SMP stuff from smp.h Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 10 ++++++++++ arch/x86/include/asm/smp.h | 6 ------ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bae482df603..29aa6d0752b 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,16 @@ #include #include +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#endif /* CONFIG_SMP */ + struct x86_cpu { struct cpu cpu; }; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 64c9e848f13..62bd3f68269 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -138,8 +138,6 @@ void play_dead_common(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -extern void prefill_possible_map(void); - void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) @@ -148,10 +146,6 @@ static inline int num_booting_cpus(void) { return cpus_weight(cpu_callout_map); } -#else -static inline void prefill_possible_map(void) -{ -} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; -- cgit From f472cdba849cc3d838f3788469316e8572463a8c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:34:25 +0530 Subject: x86: smp.h move stack_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 2 ++ arch/x86/include/asm/smp.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 29aa6d0752b..f958e7e49c0 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,8 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define stack_smp_processor_id() 0 + #endif /* CONFIG_SMP */ struct x86_cpu { diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 62bd3f68269..ed4af9a89cf 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -173,7 +173,6 @@ extern int safe_smp_processor_id(void); #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC -- cgit From 96b89dc6598a50e3aac8e2c6d826ae3795b7d030 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:35:48 +0530 Subject: x86: smp.h move safe_smp_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 1 + arch/x86/include/asm/smp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index f958e7e49c0..4c16888ffa3 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define safe_smp_processor_id() 0 #define stack_smp_processor_id() 0 #endif /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ed4af9a89cf..c92b93594ab 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -172,7 +172,6 @@ extern int safe_smp_processor_id(void); #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ #define cpu_physical_id(cpu) boot_cpu_physical_apicid -#define safe_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC -- cgit From af8968abf09fe5984bdd206e54e0eeb1dc1fa29c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:37:33 +0530 Subject: x86: smp.h move cpu_physical_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 1 + arch/x86/include/asm/smp.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 4c16888ffa3..89edafb9339 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 #define stack_smp_processor_id() 0 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c92b93594ab..c975b6f83c6 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -170,8 +170,6 @@ extern int safe_smp_processor_id(void); }) #define safe_smp_processor_id() smp_processor_id() -#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ -#define cpu_physical_id(cpu) boot_cpu_physical_apicid #endif #ifdef CONFIG_X86_LOCAL_APIC -- cgit From 6d652ea1d056390a0c33db92b44ed219284b71af Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:38:59 +0530 Subject: x86: smp.h move boot_cpu_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 7 +++++++ arch/x86/include/asm/smp.h | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 89edafb9339..f03b23e3286 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -31,4 +31,11 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); + +#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +extern unsigned char boot_cpu_id; +#else +#define boot_cpu_id 0 +#endif + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c975b6f83c6..74ad9ef6ae0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -212,11 +212,5 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ -- cgit From 41401db698cbb5d1869776bf336881db267e7d19 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:42:46 +0530 Subject: x86: rename intel_mp_floating to mpf_intel Impact: cleanup, solve 80 columns wrap problems intel_mp_floating should be renamed to mpf_intel. The reason: the 'f' in MPF already means 'floating' which means MP Floating pointer structure - no need to repeat that in the type name. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec_def.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 59568bc4767..187dc920193 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -24,7 +24,8 @@ # endif #endif -struct intel_mp_floating { +/* Intel MP Floating Pointer Structure */ +struct mpf_intel { char mpf_signature[4]; /* "_MP_" */ unsigned int mpf_physptr; /* Configuration table address */ unsigned char mpf_length; /* Our length (paragraphs) */ -- cgit From 1eb1b3b65dc3e3ffcc6a60e115c085c0c11c1077 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:43:26 +0530 Subject: x86: rename all fields of mpf_intel mpf_X to X Impact: cleanup, solve 80 columns wrap problems It would be cleaner to rename all the mpf->mpf_X fields to mpf->X - that alone would give 4 characters per usage site. (we already know that it's an 'mpf' entity - no need to duplicate that in the field too) Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec_def.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 187dc920193..4a7f96d7c18 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -26,16 +26,16 @@ /* Intel MP Floating Pointer Structure */ struct mpf_intel { - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ + char signature[4]; /* "_MP_" */ + unsigned int physptr; /* Configuration table address */ + unsigned char length; /* Our length (paragraphs) */ + unsigned char specification; /* Specification version */ + unsigned char checksum; /* Checksum (makes sum 0) */ + unsigned char feature1; /* Standard or configuration ? */ + unsigned char feature2; /* Bit7 set for IMCR|PIC */ + unsigned char feature3; /* Unused (0) */ + unsigned char feature4; /* Unused (0) */ + unsigned char feature5; /* Unused (0) */ }; #define MPC_SIGNATURE "PCMP" -- cgit From 79f3b3cb7a2586b319a43a7f29924c6c555e4357 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Thu, 8 Jan 2009 10:04:30 -0500 Subject: x86, mtrr: fix types used in userspace exported header Commit 932d27a7913fc6b3c64c6e6082628b0a1561dec9 exported some mtrr structures without using the exportable __uX types, causing userspace build failures. Signed-off-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index cb988aab716..14080d22edb 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -58,15 +58,15 @@ struct mtrr_gentry { #endif /* !__i386__ */ struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; + __u32 base_lo; + __u32 base_hi; + __u32 mask_lo; + __u32 mask_hi; }; /* In the Intel processor's MTRR interface, the MTRR type is always held in an 8 bit field: */ -typedef u8 mtrr_type; +typedef __u8 mtrr_type; #define MTRR_NUM_FIXED_RANGES 88 #define MTRR_MAX_VAR_RANGES 256 -- cgit From 068790334cececc3d2d945617ccc585477da2e38 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:17:37 +0530 Subject: x86: smp.h move cpu_callin_mask and cpu_callin_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 19 +++++++++++++++++++ arch/x86/include/asm/smp.h | 3 --- 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/cpumask.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 00000000000..308dddd5632 --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include + +#ifdef CONFIG_X86_64 + +extern cpumask_var_t cpu_callin_mask; + +#else /* CONFIG_X86_32 */ + +extern cpumask_t cpu_callin_map; + +#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) + +#endif /* CONFIG_X86_32 */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1963e27673c..c35aa5c0dd1 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -20,19 +20,16 @@ #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) -- cgit From fb8fd077fbf0de6662acfd240e8e6b25cf3202ca Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:20:24 +0530 Subject: x86: smp.h move cpu_callout_mask and cpu_callout_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 308dddd5632..9933fcad3c8 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -6,12 +6,15 @@ #ifdef CONFIG_X86_64 extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; +extern cpumask_t cpu_callout_map; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) +#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c35aa5c0dd1..a3afec5cad0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,20 +17,18 @@ #endif #include #include +#include #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) -- cgit From 493f6ca54e1ea59732dd334e35c5fe2d8e440b06 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:48:22 +0530 Subject: x86: smp.h move cpu_initialized_mask and cpu_initialized declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 9933fcad3c8..d4cfd120b84 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -7,14 +7,17 @@ extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_initialized; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) +#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a3afec5cad0..7d2a80319e8 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -21,15 +21,12 @@ #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) #endif /* CONFIG_X86_32 */ -- cgit From 52811d8c9beb67da6bc4b770de3c4134376788a1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:58:50 +0530 Subject: x86: smp.h move cpu_sibling_setup_mask and cpu_sibling_setup_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 12 ------------ 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index d4cfd120b84..26c6dad9047 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -8,16 +8,19 @@ extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; +extern cpumask_t cpu_sibling_setup_map; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) +#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 7d2a80319e8..a8cea7b0943 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -19,18 +19,6 @@ #include #include -#ifdef CONFIG_X86_64 - -extern cpumask_var_t cpu_sibling_setup_mask; - -#else /* CONFIG_X86_32 */ - -extern cpumask_t cpu_sibling_setup_map; - -#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) - -#endif /* CONFIG_X86_32 */ - extern int __cpuinit get_local_pda(int cpu); extern int smp_num_siblings; -- cgit From 4595f9620cda8a1e973588e743cf5f8436dd20c6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: x86: change flush_tlb_others to take a const struct cpumask Impact: reduce stack usage, use new cpumask API. This is made a little more tricky by uv_flush_tlb_others which actually alters its argument, for an IPI to be sent to the remaining cpus in the mask. I solve this by allocating a cpumask_var_t for this case and falling back to IPI should this fail. To eliminate temporaries in the caller, all flush_tlb_others implementations now do the this-cpu-elimination step themselves. Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)" which has been there since pre-git and yet f->flush_cpumask is always zero at this point. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/include/asm/paravirt.h | 8 +++++--- arch/x86/include/asm/tlbflush.h | 8 ++++---- arch/x86/include/asm/uv/uv_bau.h | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..c26c6bf4da0 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -244,7 +244,8 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + void (*flush_tlb_others)(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va); /* Hooks for allocating and freeing a pagetable top-level */ @@ -984,10 +985,11 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); } -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, +static inline void flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); + PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb54911..f4e1b550ce6 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask, +static inline void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { @@ -142,8 +142,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 @@ -166,7 +166,7 @@ static inline void reset_lazy_tlbstate(void) #endif /* SMP */ #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) #endif static inline void flush_tlb_kernel_range(unsigned long start, diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 50423c7b56b..74e6393bfdd 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); +extern int uv_flush_tlb_others(struct cpumask *, + struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); -- cgit From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: irq: initialize nr_irqs based on nr_cpu_ids Impact: Reduce memory usage. This is the second half of the changes to make the irq_desc_ptrs be variable sized based on nr_cpu_ids. This is done by adding a new "max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids. This necessitated moving the define of MAX_IO_APICS to a separate file (asm/apicnum.h) so it could be included without the baggage of the other asm/apicdef.h declarations. Signed-off-by: Mike Travis --- arch/x86/include/asm/apicdef.h | 8 ++------ arch/x86/include/asm/apicnum.h | 12 ++++++++++++ arch/x86/include/asm/irq_vectors.h | 16 +++++++++++----- 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 arch/x86/include/asm/apicnum.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 63134e31e8b..1a6454ef7f6 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,12 +132,8 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif +/* get MAX_IO_APICS */ +#include /* * All x86-64 systems are xAPIC compatible. diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h new file mode 100644 index 00000000000..82f613c607c --- /dev/null +++ b/arch/x86/include/asm/apicnum.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_APICNUM_H +#define _ASM_X86_APICNUM_H + +/* define MAX_IO_APICS */ +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index f7ff65032b9..602361ad0e7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -105,6 +105,8 @@ #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#include /* need MAX_IO_APICS */ + #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) @@ -112,11 +114,15 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else -# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) -# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif + +/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ +# define max_nr_irqs(nr_cpus) \ + ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ + (NR_VECTORS + (8 * NR_CPUS)) : \ + (NR_VECTORS + (32 * MAX_IO_APICS))) \ + +# define NR_IRQS max_nr_irqs(NR_CPUS) + #endif #elif defined(CONFIG_X86_VOYAGER) -- cgit From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 11 Jan 2009 09:22:58 -0800 Subject: cpumask, irq: non-x86 build failures Ingo Molnar wrote: > All non-x86 architectures fail to build: > > In file included from /home/mingo/tip/include/linux/random.h:11, > from /home/mingo/tip/include/linux/stackprotector.h:6, > from /home/mingo/tip/init/main.c:17: > /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory Do not include asm/irq_vectors.h in generic code - it's not available on all architectures. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apicdef.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 1a6454ef7f6..63134e31e8b 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,8 +132,12 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -/* get MAX_IO_APICS */ -#include +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif /* * All x86-64 systems are xAPIC compatible. -- cgit From 2bc1379712e74c5b99adaa6db433c14d8841ab4f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 11 Jan 2009 20:34:47 +0530 Subject: x86: fix apic.c build error on latest git Fix this by reintroducing asm/smp.h include in apic.c - later on I will fix this by removing non-smp data from smp.h Also fix the __inquire_remote_apic() prototype/inline. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_wakecpu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index ceb01366014..89897a6a65b 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -24,7 +24,13 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } +#ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ static inline void inquire_remote_apic(int apicid) { -- cgit From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 17:39:24 -0800 Subject: x86: arch_probe_nr_irqs Impact: save RAM with large NR_CPUS, get smaller nr_irqs Signed-off-by: Yinghai Lu Signed-off-by: Mike Travis --- arch/x86/include/asm/irq_vectors.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 602361ad0e7..a16a2ab2b42 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -115,14 +115,11 @@ # endif #else -/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ -# define max_nr_irqs(nr_cpus) \ - ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ +# define NR_IRQS \ + ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ (NR_VECTORS + (32 * MAX_IO_APICS))) \ -# define NR_IRQS max_nr_irqs(NR_CPUS) - #endif #elif defined(CONFIG_X86_VOYAGER) -- cgit From c8399943bdb70fef78798b97f975506ecc99e039 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 12 Jan 2009 23:01:15 +0100 Subject: x86, generic: mark complex bitops.h inlines as __always_inline Impact: reduce kernel image size Hugh Dickins noticed that older gcc versions when the kernel is built for code size didn't inline some of the bitops. Mark all complex x86 bitops that have more than a single asm statement or two as always inline to avoid this problem. Probably should be done for other architectures too. Ingo then found a better fix that only requires a single line change, but it unfortunately only works on gcc 4.3. On older gccs the original patch still makes a ~0.3% defconfig difference with CONFIG_OPTIMIZE_INLINING=y. With gcc 4.1 and a defconfig like build: 6116998 1138540 883788 8139326 7c323e vmlinux-oi-with-patch 6137043 1138540 883788 8159371 7c808b vmlinux-optimize-inlining ~20k / 0.3% difference. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bitops.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index e02a359d2aa..02b47a603fc 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -3,6 +3,9 @@ /* * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. */ #ifndef _LINUX_BITOPS_H @@ -53,7 +56,8 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *addr) +static __always_inline void +set_bit(unsigned int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" @@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void +clear_bit(int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" @@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) +static __always_inline int +test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) return oldbit; } -static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) +static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; -- cgit From afc7d20c8429f32f19d47367fdc36eeed2334ec3 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 9 Jan 2009 16:13:10 -0800 Subject: x86 PAT: consolidate old memtype new memtype check into a function Impact: cleanup Move the new memtype old memtype allowed check to header so that is can be shared by other users. Subsequent patch uses this in pat.c in remap_pfn_range() code path. No functionality change in this patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 83e69f4a37f..06bbcbd66e9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -341,6 +341,25 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +static inline int is_new_memtype_allowed(unsigned long flags, + unsigned long new_flags) +{ + /* + * Certain new memtypes are not allowed with certain + * requested memtype: + * - request is uncached, return cannot be write-back + * - request is write-combine, return cannot be write-back + */ + if ((flags == _PAGE_CACHE_UC_MINUS && + new_flags == _PAGE_CACHE_WB) || + (flags == _PAGE_CACHE_WC && + new_flags == _PAGE_CACHE_WB)) { + return 0; + } + + return 1; +} + #ifndef __ASSEMBLY__ /* Indicate that x86 has its own track and untrack pfn vma functions */ #define __HAVE_PFNMAP_TRACKING -- cgit From b5ba7e6d1e7e2ac808afd21be1e56dc34caf20e6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:46:17 +0530 Subject: x86: replacing mp_config_ioapic with mpc_ioapic Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7a1f44ac1f1..5a56ae9b505 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -120,14 +120,6 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_ioapic { - unsigned long mp_apicaddr; - unsigned int mp_apicid; - unsigned char mp_type; - unsigned char mp_apicver; - unsigned char mp_flags; -}; - struct mp_config_intsrc { unsigned int mp_dstapic; unsigned char mp_type; @@ -139,7 +131,7 @@ struct mp_config_intsrc { }; /* I/O APIC entries */ -extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; -- cgit From c2c21745ecba23c74690a124bcd371f83bd71e45 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:47:22 +0530 Subject: x86: replacing mp_config_intsrc with mpc_intsrc Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 5a56ae9b505..08ec793aa04 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -114,22 +114,8 @@ struct IR_IO_APIC_route_entry { extern int nr_ioapics; extern int nr_ioapic_registers[MAX_IO_APICS]; -/* - * MP-BIOS irq configuration table structures: - */ - #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_intsrc { - unsigned int mp_dstapic; - unsigned char mp_type; - unsigned char mp_irqtype; - unsigned short mp_irqflag; - unsigned char mp_srcbus; - unsigned char mp_srcbusirq; - unsigned char mp_dstirq; -}; - /* I/O APIC entries */ extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; @@ -137,7 +123,7 @@ extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; -- cgit From b665967979d0e990f196e7c4ba88e17c9ed9b781 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 11:54:27 -0800 Subject: x86: make 32bit MAX_HARDIRQS_PER_CPU to be NR_VECTORS Impact: clean up to be same as 64bit 32-bit is using per-cpu vector too, so don't use default NR_IRQS. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hardirq_32.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index cf7954d1405..d4b5d731073 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -19,6 +19,9 @@ typedef struct { DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + #define __ARCH_IRQ_STAT #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) -- cgit From 7de6883faad71e3a253d55b9e1a47b89ebce0a31 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: x86: fix pda_to_op() There's no instruction to move a 64bit immediate into memory location. Drop "i". Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2fbfff88df3..cbd3f48a832 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -78,7 +78,7 @@ do { \ case 8: \ asm(op "q %1,%%gs:%c2": \ "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ + "r" ((T__)val), \ "i"(pda_offset(field))); \ break; \ default: \ -- cgit From f10fcd47120e80f66665567dbe17f5071c7aef52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: x86: make early_per_cpu() a lvalue and use it Make early_per_cpu() a lvalue as per_cpu() is and use it where applicable. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 6 +++--- arch/x86/include/asm/topology.h | 5 +---- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece72053ba6..df644f3e53e 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -195,9 +195,9 @@ do { \ #define early_per_cpu_ptr(_name) (_name##_early_ptr) #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) #define early_per_cpu(_name, _cpu) \ - (early_per_cpu_ptr(_name) ? \ - early_per_cpu_ptr(_name)[_cpu] : \ - per_cpu(_name, _cpu)) + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ + &per_cpu(_name, _cpu)) #else /* !CONFIG_SMP */ #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab2..87ca3fd86e8 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -102,10 +102,7 @@ static inline int cpu_to_node(int cpu) /* Same function but used if called before per_cpu areas are setup */ static inline int early_cpu_to_node(int cpu) { - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - return per_cpu(x86_cpu_to_node_map, cpu); + return early_per_cpu(x86_cpu_to_node_map, cpu); } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ -- cgit From f32ff5388d86518c0375ccdb330d3b459b9c405e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: load pointer to pda into %gs while brining up a CPU [ Based on original patch from Christoph Lameter and Mike Travis. ] CPU startup code in head_64.S loaded address of a zero page into %gs for temporary use till pda is loaded but address to the actual pda is available at the point. Load the real address directly instead. This will help unifying percpu and pda handling later on. This patch is mostly taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo --- arch/x86/include/asm/trampoline.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 780ba0ab94f..90f06c25221 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -13,6 +13,7 @@ extern unsigned char *trampoline_base; extern unsigned long init_rsp; extern unsigned long initial_code; +extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) #define TRAMPOLINE_BASE 0x6000 -- cgit From c8f3329a0ddd751241e96b4100df7eda14b2cbc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: use static _cpu_pda array _cpu_pda array first uses statically allocated storage in data.init and then switches to allocated bootmem to conserve space. However, after folding pda area into percpu area, _cpu_pda array will be removed completely. Drop the reallocation part to simplify the code for soon-to-follow changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index cbd3f48a832..2d5b49c3248 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /* Per processor datastructure. %gs points to it while the kernel runs */ @@ -39,7 +40,7 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda **_cpu_pda; +extern struct x8664_pda *_cpu_pda[NR_CPUS]; extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) -- cgit From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: fold pda into percpu area on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] Currently pdas and percpu areas are allocated separately. %gs points to local pda and percpu area can be reached using pda->data_offset. This patch folds pda into percpu area. Due to strange gcc requirement, pda needs to be at the beginning of the percpu area so that pda->stack_canary is at %gs:40. To achieve this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is added and used to reserve pda sized chunk at the start of the percpu area. After this change, for boot cpu, %gs first points to pda in the data.init area and later during setup_per_cpu_areas() gets updated to point to the actual pda. This means that setup_per_cpu_areas() need to reload %gs for CPU0 while clearing pda area for other cpus as cpu0 already has modified it when control reaches setup_per_cpu_areas(). This patch also removes now unnecessary get_local_pda() and its call sites. A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 8 ++++++++ arch/x86/include/asm/smp.h | 2 -- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index df644f3e53e..0ed77cf33f7 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,6 +1,14 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H +#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif +#endif + #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a8cea7b0943..127415402ea 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -19,8 +19,6 @@ #include #include -extern int __cpuinit get_local_pda(int cpu); - extern int smp_num_siblings; extern unsigned int num_processors; -- cgit From 9939ddaff52787b2a7c1adf1b2afc95421aa0884 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: merge 64 and 32 SMP percpu handling Now that pda is allocated as part of percpu, percpu doesn't need to be accessed through pda. Unify x86_64 SMP percpu access with x86_32 SMP one. Other than the segment register, operand size and the base of percpu symbols, they behave identical now. This patch replaces now unnecessary pda->data_offset with a dummy field which is necessary to keep stack_canary at its place. This patch also moves per_cpu_offset initialization out of init_gdt() into setup_per_cpu_areas(). Note that this change also necessitates explicit per_cpu_offset initializations in voyager_smp.c. With this change, x86_OP_percpu()'s are as efficient on x86_64 as on x86_32 and also x86_64 can use assembly PER_CPU macros. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 3 +- arch/x86/include/asm/percpu.h | 127 +++++++++++++----------------------------- 2 files changed, 40 insertions(+), 90 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2d5b49c3248..e91558e3785 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -11,8 +11,7 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { struct task_struct *pcurrent; /* 0 Current process */ - unsigned long data_offset; /* 8 Per cpu data offset from linker - address */ + unsigned long dummy; unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0ed77cf33f7..556f84b9ea9 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,62 +1,13 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_64 -extern void load_pda_offset(int cpu); +#define __percpu_seg gs +#define __percpu_mov_op movq #else -static inline void load_pda_offset(int cpu) { } -#endif -#endif - -#ifdef CONFIG_X86_64 -#include - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include - -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - +#define __percpu_seg fs +#define __percpu_mov_op movl #endif -#include - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ #ifdef __ASSEMBLY__ @@ -73,42 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda); * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__##this_cpu_off, reg; \ +#define PER_CPU(var, reg) \ + __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %fs:per_cpu__##var +#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + __percpu_mov_op $per_cpu__##var, reg #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ #else /* ...!ASSEMBLY */ -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ -#ifdef CONFIG_SMP - -#define __my_cpu_offset x86_read_percpu(this_cpu_off) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" +#include -#else /* !SMP */ - -#define __percpu_seg "" - -#endif /* SMP */ +#ifdef CONFIG_SMP +#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" +#define __my_cpu_offset x86_read_percpu(this_cpu_off) +#else +#define __percpu_seg_str +#endif #include @@ -128,20 +63,25 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ + asm(op "b %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ + asm(op "w %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ + asm(op "l %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ + case 8: \ + asm(op "q %1,"__percpu_seg_str"%0" \ + : "+m" (var) \ + : "r" ((T__)val)); \ + break; \ default: __bad_percpu_size(); \ } \ } while (0) @@ -151,17 +91,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ + asm(op "b "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ + asm(op "w "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ + asm(op "l "__percpu_seg_str"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -175,8 +120,14 @@ do { \ #define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) + +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif + #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP -- cgit From b12d8db8fbfaed1e8222a15333a3645599636854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: make pda a percpu variable [ Based on original patch from Christoph Lameter and Mike Travis. ] As pda is now allocated in percpu area, it can easily be made a proper percpu variable. Make it so by defining per cpu symbol from linker script and declaring it in C code for SMP and simply defining it for UP. This change cleans up code and brings SMP and UP closer a bit. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e91558e3785..66ae1043393 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -7,6 +7,7 @@ #include #include #include +#include /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { @@ -39,10 +40,10 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda *_cpu_pda[NR_CPUS]; +DECLARE_PER_CPU(struct x8664_pda, __pda); extern void pda_init(int); -#define cpu_pda(i) (_cpu_pda[i]) +#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) /* * There is no fast way to get the base address of the PDA, all the accesses -- cgit From 49357d19e4fb31e28796eaff83499e7584c26878 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: convert pda ops to wrappers around x86 percpu accessors pda is now a percpu variable and there's no reason it can't use plain x86 percpu accessors. Add x86_test_and_clear_bit_percpu() and replace pda op implementations with wrappers around x86 percpu accessors. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 88 +++---------------------------------------- arch/x86/include/asm/percpu.h | 10 +++++ 2 files changed, 16 insertions(+), 82 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 66ae1043393..e3d3a081d79 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -45,91 +45,15 @@ extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -/* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ -extern void __bad_pda_field(void) __attribute__((noreturn)); - -/* - * proxy_pda doesn't actually exist, but tell gcc it is accessed for - * all PDA accesses so it gets read/write dependencies right. - */ -extern struct x8664_pda _proxy_pda; - -#define pda_offset(field) offsetof(struct x8664_pda, field) - -#define pda_to_op(op, field, val) \ -do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i" (pda_offset(field))); \ - break; \ - case 8: \ - asm(op "q %1,%%gs:%c2": \ - "+m" (_proxy_pda.field) : \ - "r" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ -} while (0) - -#define pda_from_op(op, field) \ -({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%gs:%c1,%0": \ - "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 8: \ - asm(op "q %%gs:%c1,%0": \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - ret__; \ -}) - -#define read_pda(field) pda_from_op("mov", field) -#define write_pda(field, val) pda_to_op("mov", field, val) -#define add_pda(field, val) pda_to_op("add", field, val) -#define sub_pda(field, val) pda_to_op("sub", field, val) -#define or_pda(field, val) pda_to_op("or", field, val) +#define read_pda(field) x86_read_percpu(__pda.field) +#define write_pda(field, val) x86_write_percpu(__pda.field, val) +#define add_pda(field, val) x86_add_percpu(__pda.field, val) +#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) +#define or_pda(field, val) x86_or_percpu(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ -({ \ - int old__; \ - asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (_proxy_pda.field) \ - : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ - old__; \ -}) + x86_test_and_clear_bit_percpu(bit, __pda.field) #endif diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 556f84b9ea9..328b31a429d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -121,6 +121,16 @@ do { \ #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +/* This is not atomic against other CPUs -- CPU preemption needs to be off */ +#define x86_test_and_clear_bit_percpu(bit, var) \ +({ \ + int old__; \ + asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \ + : "=r" (old__) \ + : "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \ + old__; \ +}) + #ifdef CONFIG_X86_64 extern void load_pda_offset(int cpu); #else -- cgit From 004aa322f855a765741d9437a98dd8fe2e4f32a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: misc clean up after the percpu update Do the following cleanups: * kill x86_64_init_pda() which now is equivalent to pda_init() * use per_cpu_offset() instead of cpu_pda() when initializing initial_gs Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/setup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a..536949749bc 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); -- cgit From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 22:15:53 +0900 Subject: percpu: add optimized generic percpu accessors It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 2 +- arch/x86/include/asm/irq_regs_32.h | 4 ++-- arch/x86/include/asm/mmu_context_32.h | 12 ++++++------ arch/x86/include/asm/pda.h | 10 +++++----- arch/x86/include/asm/percpu.h | 24 +++++++++++++----------- arch/x86/include/asm/smp.h | 2 +- 6 files changed, 28 insertions(+), 26 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d67..0728480f5c5 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -10,7 +10,7 @@ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { - return x86_read_percpu(current_task); + return percpu_read(current_task); } #else /* X86_32 */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index 86afd747345..d7ed33ee94e 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h @@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return x86_read_percpu(irq_regs); + return percpu_read(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) @@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) struct pt_regs *old_regs; old_regs = get_irq_regs(); - x86_write_percpu(irq_regs, new_regs); + percpu_write(irq_regs, new_regs); return old_regs; } diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 7e98ce1d2c0..08b53454f83 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h @@ -4,8 +4,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - x86_write_percpu(cpu_tlbstate.active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); @@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, } #ifdef CONFIG_SMP else { - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e3d3a081d79..47f274fe695 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -45,11 +45,11 @@ extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -#define read_pda(field) x86_read_percpu(__pda.field) -#define write_pda(field, val) x86_write_percpu(__pda.field, val) -#define add_pda(field, val) x86_add_percpu(__pda.field, val) -#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) -#define or_pda(field, val) x86_or_percpu(__pda.field, val) +#define read_pda(field) percpu_read(__pda.field) +#define write_pda(field, val) percpu_write(__pda.field, val) +#define add_pda(field, val) percpu_add(__pda.field, val) +#define sub_pda(field, val) percpu_sub(__pda.field, val) +#define or_pda(field, val) percpu_or(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 328b31a429d..03aa4b00a1c 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -40,16 +40,11 @@ #ifdef CONFIG_SMP #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" -#define __my_cpu_offset x86_read_percpu(this_cpu_off) +#define __my_cpu_offset percpu_read(this_cpu_off) #else #define __percpu_seg_str #endif -#include - -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); - /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); @@ -115,11 +110,13 @@ do { \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) +#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) +#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ @@ -131,6 +128,11 @@ do { \ old__; \ }) +#include + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + #ifdef CONFIG_X86_64 extern void load_pda_offset(int cpu); #else diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 127415402ea..c7bbbbe65d3 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +#define raw_smp_processor_id() (percpu_read(cpu_number)) extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -- cgit From 1b437c8c73a36daa471dd54a63c426d72af5723d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_64.h | 24 +++++++++++++++++++----- arch/x86/include/asm/pda.h | 10 ---------- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index b5a6b5d5670..a65bab20f6c 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,22 +3,36 @@ #include #include -#include #include +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT 1 -#define inc_irq_stat(member) add_pda(member, 1) +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -#define local_softirq_pending() read_pda(__softirq_pending) +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 47f274fe695..69a40757e21 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,19 +25,9 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; short isidle; struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); -- cgit From 9eb912d1aa6b8106e06a73ea6702ec3dab0d6a1a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: x86-64: Move TLB state from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/mmu_context_64.h | 16 +++++++--------- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/tlbflush.h | 7 ++----- 3 files changed, 9 insertions(+), 16 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h index 677d36e9540..c4572505ab3 100644 --- a/arch/x86/include/asm/mmu_context_64.h +++ b/arch/x86/include/asm/mmu_context_64.h @@ -1,13 +1,11 @@ #ifndef _ASM_X86_MMU_CONTEXT_64_H #define _ASM_X86_MMU_CONTEXT_64_H -#include - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); load_cr3(next->pgd); @@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } #ifdef CONFIG_SMP else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 69a40757e21..8ee835ed10e 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,9 +25,7 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - short mmu_state; short isidle; - struct mm_struct *active_mm; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 17feaa9c7e7..d3539f998f8 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask, #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + percpu_write(cpu_tlbstate.state, 0); + percpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ -- cgit From 26f80bd6a9ab17bc8a60b6092e7c0d05c5927ce5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Convert irqstacks to per-cpu Move the irqstackptr variable from the PDA to per-cpu. Make the stacks themselves per-cpu, removing some specific allocation code. Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot adjustments. tj: * sprinkle some underbars around. * irq_stack_ptr is not used till traps_init(), no reason to initialize it early. On SMP, just leaving it NULL till proper initialization in setup_per_cpu_areas() works. Dropped is_boot_cpu and early irq_stack_ptr initialization. * do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) instead of (char, irq_stack[IRQ_STACK_SIZE]). Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/page_64.h | 4 ++-- arch/x86/include/asm/pda.h | 1 - arch/x86/include/asm/processor.h | 3 +++ 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5ebca29f44f..e27fdbe5f9e 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -13,8 +13,8 @@ #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQSTACK_ORDER 2 -#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) +#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define STACKFAULT_STACK 1 #define DOUBLEFAULT_STACK 2 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 8ee835ed10e..09965f7a216 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -22,7 +22,6 @@ struct x8664_pda { /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif - char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ short isidle; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..f511246fa6c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -378,6 +378,9 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +DECLARE_PER_CPU(char *, irq_stack_ptr); #endif extern void print_cpu_info(struct cpuinfo_x86 *); -- cgit From ea9279066de44053d0c20ea855bc9f4706652d84 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move cpu number from PDA to per-cpu and consolidate with 32-bit. tj: moved cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA for voyager. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/include/asm/smp.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 09965f7a216..668d5a5b6f7 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -16,7 +16,7 @@ struct x8664_pda { unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ - unsigned int cpunumber; /* 36 Logical CPU number */ + unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c7bbbbe65d3..68636e767a9 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -25,9 +25,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -#ifdef CONFIG_X86_32 DECLARE_PER_CPU(int, cpu_number); -#endif static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -164,7 +162,7 @@ extern unsigned disabled_cpus __cpuinitdata; extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() read_pda(cpunumber) +#define raw_smp_processor_id() (percpu_read(cpu_number)) #define stack_smp_processor_id() \ ({ \ -- cgit From c6f5e0acd5d12ee23f701f15889872e67b47caa6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move current task from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 24 +++--------------------- arch/x86/include/asm/pda.h | 4 ++-- arch/x86/include/asm/system.h | 4 ++-- 3 files changed, 7 insertions(+), 25 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0728480f5c5..c68c361697e 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -1,39 +1,21 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H -#ifdef CONFIG_X86_32 #include #include +#ifndef __ASSEMBLY__ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return percpu_read(current_task); -} - -#else /* X86_32 */ - -#ifndef __ASSEMBLY__ -#include - -struct task_struct; static __always_inline struct task_struct *get_current(void) { - return read_pda(pcurrent); + return percpu_read(current_task); } -#else /* __ASSEMBLY__ */ - -#include -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg +#define current get_current() #endif /* __ASSEMBLY__ */ -#endif /* X86_32 */ - -#define current get_current() - #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 668d5a5b6f7..7209302d922 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -11,8 +11,8 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long dummy; + unsigned long unused1; + unsigned long unused2; unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1..4399aac680e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -106,7 +106,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [current_task] "m" (per_cpu_var(current_task)) \ : "memory", "cc" __EXTRA_CLOBBER) #endif -- cgit From 9af45651f1f7c89942e016a1a00a7ebddfa727f8 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move kernelstack from PDA to per-cpu. Also clean up PER_CPU_VAR usage in xen-asm_64.S tj: * remove now unused stack_thread_info() * s/kernelstack/kernel_stack/ * added FIXME comment in xen-asm_64.S Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 4 +--- arch/x86/include/asm/thread_info.h | 20 ++++++++------------ 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 7209302d922..4d28ffba6e1 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -13,7 +13,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; - unsigned long kernelstack; /* 16 top of kernel stack for current */ + unsigned long unused3; unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ @@ -44,6 +44,4 @@ extern void pda_init(int); #endif -#define PDA_STACKOFFSET (5*8) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 98789647baa..b46f8ca007b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include +#include +#define KERNEL_STACK_OFFSET (5*8) /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} +DECLARE_PER_CPU(unsigned long, kernel_stack); -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) +static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + ti = (void *)(percpu_read(kernel_stack) + + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } @@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq PER_CPU_VAR(kernel_stack),reg ; \ + subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg #endif -- cgit From 3d1e42a7cf945e289d6ba26159aa0e2b0645401b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move oldrsp from PDA to per-cpu. tj: * in asm-offsets_64.c, pda.h inclusion shouldn't be removed as pda is still referenced in the file * s/oldrsp/old_rsp/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4d28ffba6e1..ae23deb9955 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -14,7 +14,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; unsigned long unused3; - unsigned long oldrsp; /* 24 user rsp for system call */ + unsigned long unused4; int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR -- cgit From 5689553076c4a67b83426b076082c63085b7567a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move irqcount from PDA to per-cpu. tj: s/irqcount/irq_count/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index ae23deb9955..4527d70314d 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -15,7 +15,7 @@ struct x8664_pda { unsigned long unused2; unsigned long unused3; unsigned long unused4; - int irqcount; /* 32 Irq nesting counter. Starts -1 */ + int unused5; unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ -- cgit From e7a22c1ebcc1caa8178df1819d05128bb5b45ab9 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: x86-64: Move nodenumber from PDA to per-cpu. tj: * s/nodenumber/node_number/ * removed now unused pda variable from pda_init() Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/include/asm/topology.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4527d70314d..b30ef6bddc4 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -22,7 +22,6 @@ struct x8664_pda { /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif - short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ short isidle; } ____cacheline_aligned_in_smp; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 87ca3fd86e8..ffea1fe03a9 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map; DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); /* Returns the number of the current Node. */ -#define numa_node_id() read_pda(nodenumber) +DECLARE_PER_CPU(int, node_number); +#define numa_node_id() percpu_read(node_number) #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); -- cgit From c2558e0eba66b49993e619da66c95a50a97830a3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: x86-64: Move isidle from PDA to per-cpu. tj: s/isidle/is_idle/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index b30ef6bddc4..c31ca048a90 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -23,7 +23,6 @@ struct x8664_pda { offset 40!!! */ #endif short in_bootmem; /* pda lives in bootmem */ - short isidle; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); -- cgit From 87b264065880fa696c121dad8498a60524e0f6de Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: x86-64: Use absolute displacements for per-cpu accesses. Accessing memory through %gs should not use rip-relative addressing. Adding a P prefix for the argument tells gcc to not add (%rip) to the memory references. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 26 +++++++++++++------------- arch/x86/include/asm/system.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 03aa4b00a1c..165d5272ece 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -39,10 +39,10 @@ #include #ifdef CONFIG_SMP -#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" +#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) #else -#define __percpu_seg_str +#define __percpu_arg(x) "%" #x #endif /* For arch-specific code, we can use direct single-insn ops (they @@ -58,22 +58,22 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg_str"%0" \ + asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg_str"%0" \ + asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg_str"%0" \ + asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 8: \ - asm(op "q %1,"__percpu_seg_str"%0" \ + asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ : "r" ((T__)val)); \ break; \ @@ -86,22 +86,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg_str"%1,%0" \ + asm(op "b "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg_str"%1,%0" \ + asm(op "w "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg_str"%1,%0" \ + asm(op "l "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_seg_str"%1,%0" \ + asm(op "q "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -122,9 +122,9 @@ do { \ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ int old__; \ - asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \ - : "=r" (old__) \ - : "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (per_cpu__##var) \ + : "dIr" (bit)); \ old__; \ }) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 4399aac680e..d1dc27dba36 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ -- cgit