diff options
105 files changed, 1135 insertions, 1248 deletions
@@ -16,6 +16,13 @@ NAME = Man-Eating Seals of Antiquity # o print "Entering directory ..."; MAKEFLAGS += -rR --no-print-directory +# Avoid funny character set dependencies +unexport LC_ALL +LC_CTYPE=C +LC_COLLATE=C +LC_NUMERIC=C +export LC_CTYPE LC_COLLATE LC_NUMERIC + # We are using a recursive build, so we need to do a little thinking # to get the ordering right. # diff --git a/arch/Kconfig b/arch/Kconfig index d82875820a1..9d055b4f058 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -135,9 +135,7 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES config HAVE_HW_BREAKPOINT bool - depends on HAVE_PERF_EVENTS - select ANON_INODES - select PERF_EVENTS + depends on PERF_EVENTS config HAVE_USER_RETURN_NOTIFIER bool diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 6118890c946..6be4503201a 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -174,7 +174,7 @@ static int fallback_init_cip(struct crypto_tfm *tfm) if (IS_ERR(sctx->fallback.cip)) { pr_err("Allocating AES fallback algorithm %s failed\n", name); - return PTR_ERR(sctx->fallback.blk); + return PTR_ERR(sctx->fallback.cip); } return 0; diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 77df726180b..2b92d501425 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -164,7 +164,7 @@ static inline void part_hdr__part_name(enum diag204_format type, void *hdr, LPAR_NAME_LEN); EBCASC(name, LPAR_NAME_LEN); name[LPAR_NAME_LEN] = 0; - strstrip(name); + strim(name); } struct cpu_info { @@ -523,7 +523,7 @@ static int diag224_idx2name(int index, char *name) memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN), CPU_NAME_LEN); name[CPU_NAME_LEN] = 0; - strstrip(name); + strim(name); return 0; } diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index d01fc8f799f..f0b0d31f0b4 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -124,7 +124,7 @@ static int hpyfs_vm_create_guest(struct super_block *sb, /* guest dir */ memcpy(guest_name, data->guest_name, NAME_LEN); EBCASC(guest_name, NAME_LEN); - strstrip(guest_name); + strim(guest_name); guest_dir = hypfs_mkdir(sb, systems_dir, guest_name); if (IS_ERR(guest_dir)) return PTR_ERR(guest_dir); diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index cb5232df151..192a7203a14 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -269,7 +269,8 @@ #define __NR_pwritev 329 #define __NR_rt_tgsigqueueinfo 330 #define __NR_perf_event_open 331 -#define NR_syscalls 332 +#define __NR_recvmmsg 332 +#define NR_syscalls 333 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 30de2d0e52b..faeaccc7d7d 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1853,3 +1853,12 @@ sys32_execve_wrapper: llgtr %r3,%r3 # compat_uptr_t * llgtr %r4,%r4 # compat_uptr_t * jg sys32_execve # branch to system call + + .globl compat_sys_recvmmsg_wrapper +compat_sys_recvmmsg_wrapper: + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct compat_mmsghdr * + llgfr %r4,%r4 # unsigned int + llgfr %r5,%r5 # unsigned int + llgtr %r6,%r6 # struct compat_timespec * + jg compat_sys_recvmmsg diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 4890ac6d7fa..4d73296fed7 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -221,7 +221,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ const char *buf, size_t len) \ { \ strncpy(_value, buf, sizeof(_value) - 1); \ - strstrip(_value); \ + strim(_value); \ return len; \ } \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ @@ -472,7 +472,7 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, return sprintf(page, "#unknown#\n"); memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); - strstrip(loadparm); + strim(loadparm); return sprintf(page, "%s\n", loadparm); } @@ -776,7 +776,7 @@ static void reipl_get_ascii_loadparm(char *loadparm, memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); loadparm[LOADPARM_LEN] = 0; - strstrip(loadparm); + strim(loadparm); } static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb, diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 653c6a17874..13815d39f7d 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -959,7 +959,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_fpregs_set, }, [REGSET_GENERAL_EXTENDED] = { - .core_note_type = NT_PRXSTATUS, + .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 30eca070d42..4f292c93687 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -340,3 +340,4 @@ SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */ SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) +SYSCALL(sys_recvmmsg,sys_recvmmsg,compat_sys_recvmmsg_wrapper) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c2e42cc65ce..6e7ad63854c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -18,7 +18,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/ptrace.h> +#include <linux/tracehook.h> #include <linux/timer.h> #include <linux/mm.h> #include <linux/smp.h> @@ -382,7 +382,7 @@ void __kprobes do_single_step(struct pt_regs *regs) SIGTRAP) == NOTIFY_STOP){ return; } - if ((current->ptrace & PT_PTRACED) != 0) + if (tracehook_consider_fatal_signal(current, SIGTRAP)) force_sig(SIGTRAP, current); } @@ -483,7 +483,7 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (current->ptrace & PT_PTRACED) + if (tracehook_consider_fatal_signal(current, SIGTRAP)) force_sig(SIGTRAP, current); else signal = SIGILL; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3b2a5aca4ed..55298e89157 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -50,6 +50,8 @@ config X86 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_HW_BREAKPOINT + select PERF_EVENTS + select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 613700f27a4..637e1ec963c 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -153,6 +153,7 @@ #define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ +#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ /* * Auxiliary flags: Linux defined - For features scattered in various diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 08c48a81841..eeac829a0f4 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -103,7 +103,8 @@ extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); extern void send_cleanup_vector(struct irq_cfg *); struct irq_desc; -extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *, + unsigned int *dest_id); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4ffe09b2ad7..1cd58cdbc03 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -12,6 +12,7 @@ #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ /* EFER bits: */ #define _EFER_SCE 0 /* SYSCALL/SYSRET */ @@ -123,6 +124,7 @@ #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff #define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 2d228fc9b4b..c5bc4c2d33f 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -27,6 +27,18 @@ struct msr { }; }; +struct msr_info { + u32 msr_no; + struct msr reg; + struct msr *msrs; + int err; +}; + +struct msr_regs_info { + u32 *regs; + int err; +}; + static inline unsigned long long native_read_tscp(unsigned int *aux) { unsigned long low, high; @@ -240,9 +252,9 @@ do { \ #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ (u32)((val) >> 32)) -#define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2)) +#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) -#define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) +#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0) struct msr *msrs_alloc(void); void msrs_free(struct msr *msrs); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6f8ec1c37e0..fc801bab1b3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -181,7 +181,7 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ - asm("cpuid" + asm volatile("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index cf86a5e7381..35e89122a42 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -5,6 +5,29 @@ extern int kstack_depth_to_print; int x86_is_stack_id(int id, char *name); +struct thread_info; +struct stacktrace_ops; + +typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, + unsigned long *stack, + unsigned long bp, + const struct stacktrace_ops *ops, + void *data, + unsigned long *end, + int *graph); + +extern unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + +extern unsigned long +print_context_stack_bp(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + /* Generic stack tracer with callbacks */ struct stacktrace_ops { @@ -14,6 +37,7 @@ struct stacktrace_ops { void (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); + walk_stack_t walk_stack; }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index d0c99abc26c..eacbd2b31d2 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -306,10 +306,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } struct apic apic_physflat = { diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 38dcecfa581..cb804c5091b 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -131,10 +131,7 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return bigsmp_cpu_to_logical_apicid(cpu); - - return BAD_APICID; + return bigsmp_cpu_to_logical_apicid(cpu); } static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 11a5851f1f5..de00c4619a5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2276,26 +2276,28 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq /* * Either sets desc->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and * leaves desc->affinity untouched. */ unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, + unsigned int *dest_id) { struct irq_cfg *cfg; unsigned int irq; if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; + return -1; irq = desc->irq; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; + return -1; cpumask_copy(desc->affinity, mask); - return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); + *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); + return 0; } static int @@ -2311,12 +2313,11 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) cfg = desc->chip_data; spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { + ret = set_desc_affinity(desc, mask, &dest); + if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); - ret = 0; } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -3351,8 +3352,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3384,8 +3384,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) if (get_irte(irq, &irte)) return -1; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; irte.vector = cfg->vector; @@ -3567,8 +3566,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3623,8 +3621,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3730,8 +3727,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irq_cfg *cfg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a5371ec3677..cf69c59f491 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -148,10 +148,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_logical_apicid, cpu); } static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a8989aadc99..8972f38c5ce 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -146,10 +146,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } static unsigned int x2apic_phys_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b684bb303cb..d56b0efb205 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -225,10 +225,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } static unsigned int x2apic_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8dc3ea145c9..e485825130d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid) /* * Fixup core topology information for AMD multi-node processors. - * Assumption 1: Number of cores in each internal node is the same. - * Assumption 2: Mixed systems with both single-node and dual-node - * processors are not supported. + * Assumption: Number of cores in each internal node is the same. */ #ifdef CONFIG_X86_HT static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) { -#ifdef CONFIG_PCI - u32 t, cpn; - u8 n, n_id; + unsigned long long value; + u32 nodes, cores_per_node; int cpu = smp_processor_id(); + if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) + return; + /* fixup topology information only once for a core */ if (cpu_has(c, X86_FEATURE_AMD_DCM)) return; - /* check for multi-node processor on boot cpu */ - t = read_pci_config(0, 24, 3, 0xe8); - if (!(t & (1 << 29))) + rdmsrl(MSR_FAM10H_NODE_ID, value); + + nodes = ((value >> 3) & 7) + 1; + if (nodes == 1) return; set_cpu_cap(c, X86_FEATURE_AMD_DCM); + cores_per_node = c->x86_max_cores / nodes; - /* cores per node: each internal node has half the number of cores */ - cpn = c->x86_max_cores >> 1; + /* store NodeID, use llc_shared_map to store sibling info */ + per_cpu(cpu_llc_id, cpu) = value & 7; - /* even-numbered NB_id of this dual-node processor */ - n = c->phys_proc_id << 1; - - /* - * determine internal node id and assign cores fifty-fifty to - * each node of the dual-node processor - */ - t = read_pci_config(0, 24 + n, 3, 0xe8); - n = (t>>30) & 0x3; - if (n == 0) { - if (c->cpu_core_id < cpn) - n_id = 0; - else - n_id = 1; - } else { - if (c->cpu_core_id < cpn) - n_id = 1; - else - n_id = 0; - } - - /* compute entire NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; - - /* fixup core id to be in range from 0 to cpn */ - c->cpu_core_id = c->cpu_core_id % cpn; -#endif + /* fixup core id to be in range from 0 to (cores_per_node - 1) */ + c->cpu_core_id = c->cpu_core_id % cores_per_node; } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9c31e8b09d2..879666f4d87 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -70,7 +70,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); sched_clock_stable = 1; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 45506d5dd8d..c223b7e895d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2336,6 +2336,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, + .walk_stack = print_context_stack_bp, }; #include "../dumpstack.h" diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 7ef24a79699..cb27fd6136c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -187,7 +187,8 @@ static int __init cpuid_init(void) int i, err = 0; i = 0; - if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { + if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS, + "cpu/cpuid", &cpuid_fops)) { printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", CPUID_MAJOR); err = -EBUSY; @@ -216,7 +217,7 @@ out_class: } class_destroy(cpuid_class); out_chrdev: - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); + __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); out: return err; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0a0aa1cec8f..c56bc287303 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -109,6 +109,30 @@ print_context_stack(struct thread_info *tinfo, } return bp; } +EXPORT_SYMBOL_GPL(print_context_stack); + +unsigned long +print_context_stack_bp(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + unsigned long *ret_addr = &frame->return_address; + + while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + unsigned long addr = *ret_addr; + + if (__kernel_text_address(addr)) { + ops->address(data, addr, 1); + frame = frame->next_frame; + ret_addr = &frame->return_address; + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + } + } + return (unsigned long)frame; +} +EXPORT_SYMBOL_GPL(print_context_stack_bp); static void @@ -141,10 +165,11 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, + .walk_stack = print_context_stack, }; void diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 81086c227ab..4fd1420faff 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,12 +14,6 @@ #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif -extern unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); - extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0ed4c7abb6..ae775ca47b2 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -58,7 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index b13af53883a..0ad9597073f 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -188,8 +188,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = print_context_stack(tinfo, stack, bp, ops, - data, estack_end, &graph); + bp = ops->walk_stack(tinfo, stack, bp, ops, + data, estack_end, &graph); ops->stack(data, "<EOE>"); /* * We link to the next stack via the diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index f50447d961c..05ed7ab2ca4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -724,7 +724,7 @@ core_initcall(e820_mark_nvs_memory); /* * Early reserved memory areas. */ -#define MAX_EARLY_RES 20 +#define MAX_EARLY_RES 32 struct early_res { u64 start, end; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 572b07eee3f..4bd93c9b2b2 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -246,7 +246,7 @@ static int __init msr_init(void) int i, err = 0; i = 0; - if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { + if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { printk(KERN_ERR "msr: unable to get major %d for msr\n", MSR_MAJOR); err = -EBUSY; @@ -274,7 +274,7 @@ out_class: msr_device_destroy(i); class_destroy(msr_class); out_chrdev: - unregister_chrdev(MSR_MAJOR, "cpu/msr"); + __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); out: return err; } diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index c3eb207181f..922eefbb3f6 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -53,17 +53,19 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, - .stack = save_stack_stack, - .address = save_stack_address, + .warning = save_stack_warning, + .warning_symbol = save_stack_warning_symbol, + .stack = save_stack_stack, + .address = save_stack_address, + .walk_stack = print_context_stack, }; static const struct stacktrace_ops save_stack_ops_nosched = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, - .stack = save_stack_stack, - .address = save_stack_address_nosched, + .warning = save_stack_warning, + .warning_symbol = save_stack_warning_symbol, + .stack = save_stack_stack, + .address = save_stack_address_nosched, + .walk_stack = print_context_stack, }; /* diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cd982f48e23..597683aa5ba 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -763,6 +763,7 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; + sched_clock_stable = 0; printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 61d805df4c9..ece73d8e324 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -215,8 +215,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) unsigned long mmr_offset; unsigned mmr_pnode; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; mmr_value = 0; diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 45b20e486c2..cffd754f303 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) := msr.o +obj-$(CONFIG_SMP) += msr-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o @@ -22,7 +22,7 @@ lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-$(CONFIG_KPROBES) += insn.o inat.o -obj-y += msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c new file mode 100644 index 00000000000..a6b1b86d225 --- /dev/null +++ b/arch/x86/lib/msr-smp.c @@ -0,0 +1,204 @@ +#include <linux/module.h> +#include <linux/preempt.h> +#include <linux/smp.h> +#include <asm/msr.h> + +static void __rdmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + rdmsr(rv->msr_no, reg->l, reg->h); +} + +static void __wrmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + wrmsr(rv->msr_no, reg->l, reg->h); +} + +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err; +} +EXPORT_SYMBOL(rdmsr_on_cpu); + +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); + + return err; +} +EXPORT_SYMBOL(wrmsr_on_cpu); + +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) +{ + struct msr_info rv; + int this_cpu; + + memset(&rv, 0, sizeof(rv)); + + rv.msrs = msrs; + rv.msr_no = msr_no; + + this_cpu = get_cpu(); + + if (cpumask_test_cpu(this_cpu, mask)) + msr_func(&rv); + + smp_call_function_many(mask, msr_func, &rv, 1); + put_cpu(); +} + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} +EXPORT_SYMBOL(rdmsr_on_cpus); + +/* + * wrmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); +} +EXPORT_SYMBOL(wrmsr_on_cpus); + +/* These "safe" variants are slower and should be used when the target MSR + may not actually exist. */ +static void __rdmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); +} + +static void __wrmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); +} + +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_on_cpu); + +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_on_cpu); + +/* + * These variants are significantly slower, but allows control over + * the entire 32-bit GPR set. + */ +static void __rdmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = rdmsr_safe_regs(rv->regs); +} + +static void __wrmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = wrmsr_safe_regs(rv->regs); +} + +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); + +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 87283417793..8f8eebdca7d 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -1,123 +1,7 @@ #include <linux/module.h> #include <linux/preempt.h> -#include <linux/smp.h> #include <asm/msr.h> -struct msr_info { - u32 msr_no; - struct msr reg; - struct msr *msrs; - int err; -}; - -static void __rdmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - rdmsr(rv->msr_no, reg->l, reg->h); -} - -static void __wrmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - wrmsr(rv->msr_no, reg->l, reg->h); -} - -int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err; -} -EXPORT_SYMBOL(rdmsr_on_cpu); - -int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); - - return err; -} -EXPORT_SYMBOL(wrmsr_on_cpu); - -static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, - struct msr *msrs, - void (*msr_func) (void *info)) -{ - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - msr_func(&rv); - - smp_call_function_many(mask, msr_func, &rv, 1); - put_cpu(); -} - -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); -} -EXPORT_SYMBOL(rdmsr_on_cpus); - -/* - * wrmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); -} -EXPORT_SYMBOL(wrmsr_on_cpus); - struct msr *msrs_alloc(void) { struct msr *msrs = NULL; @@ -137,100 +21,3 @@ void msrs_free(struct msr *msrs) free_percpu(msrs); } EXPORT_SYMBOL(msrs_free); - -/* These "safe" variants are slower and should be used when the target MSR - may not actually exist. */ -static void __rdmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); -} - -static void __wrmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); -} - -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_on_cpu); - -int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_on_cpu); - -/* - * These variants are significantly slower, but allows control over - * the entire 32-bit GPR set. - */ -struct msr_regs_info { - u32 *regs; - int err; -}; - -static void __rdmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = rdmsr_safe_regs(rv->regs); -} - -static void __wrmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = wrmsr_safe_regs(rv->regs); -} - -int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); - -int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 6f8aa33031c..9324f13492d 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void) e820_register_active_regions(chunk->nid, chunk->start_pfn, min(chunk->end_pfn, max_pfn)); } + /* for out of order entries in SRAT */ + sort_node_map(); for_each_online_node(nid) { unsigned long start = node_start_pfn[nid]; diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index d8907548966..a27124185fc 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -317,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) unsigned long s = nodes[i].start >> PAGE_SHIFT; unsigned long e = nodes[i].end >> PAGE_SHIFT; pxmram += e - s; - pxmram -= absent_pages_in_range(s, e); + pxmram -= __absent_pages_in_range(i, s, e); if ((long)pxmram < 0) pxmram = 0; } @@ -373,6 +373,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) for_each_node_mask(i, nodes_parsed) e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); + /* for out of order entries in SRAT */ + sort_node_map(); if (!nodes_cover_memory(nodes)) { bad_srat(); return -1; diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 044897be021..3855096c59b 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -41,10 +41,11 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, + .walk_stack = print_context_stack, }; struct frame_head { diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index 0d13cd9fdcf..5bbb5a33f22 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -9,7 +9,7 @@ BEGIN { } /^GNU/ { - split($4, ver, "."); + split($3, ver, "."); if (ver[1] > od_ver || (ver[1] == od_ver && ver[2] >= od_sver)) { exit 1; diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 7a6850683c3..eaf11f52fc0 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -6,8 +6,6 @@ # Awk implementation sanity check function check_awk_implement() { - if (!match("abc", "[[:lower:]]+")) - return "Your awk doesn't support charactor-class." if (sprintf("%x", 0) != "0") return "Your awk has a printf-format problem." return "" @@ -44,12 +42,12 @@ BEGIN { delete gtable delete atable - opnd_expr = "^[[:alpha:]/]" + opnd_expr = "^[A-Za-z/]" ext_expr = "^\\(" sep_expr = "^\\|$" - group_expr = "^Grp[[:alnum:]]+" + group_expr = "^Grp[0-9A-Za-z]+" - imm_expr = "^[IJAO][[:lower:]]" + imm_expr = "^[IJAO][a-z]" imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" @@ -62,7 +60,7 @@ BEGIN { imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" - modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1683ebda900..f4ea5a8c325 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3022,7 +3022,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) case WRITE_16: return ata_scsi_rw_xlat; - case 0x93 /*WRITE_SAME_16*/: + case WRITE_SAME_16: return ata_scsi_write_same_xlat; case SYNCHRONIZE_CACHE: diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index efa8773bef5..741065c9da6 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -2275,7 +2275,7 @@ void ata_sff_drain_fifo(struct ata_queued_cmd *qc) ap = qc->ap; /* Drain up to 64K of data before we give up this recovery method */ for (count = 0; (ap->ops->sff_check_status(ap) & ATA_DRQ) - && count < 32768; count++) + && count < 65536; count += 2) ioread16(ap->ioaddr.data_addr); /* Can become DEBUG later */ diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c index 9a09a1b11ca..dd26bc73bd9 100644 --- a/drivers/ata/pata_hpt3x2n.c +++ b/drivers/ata/pata_hpt3x2n.c @@ -8,7 +8,7 @@ * Copyright (C) 1999-2003 Andre Hedrick <andre@linux-ide.org> * Portions Copyright (C) 2001 Sun Microsystems, Inc. * Portions Copyright (C) 2003 Red Hat Inc - * Portions Copyright (C) 2005-2007 MontaVista Software, Inc. + * Portions Copyright (C) 2005-2009 MontaVista Software, Inc. * * * TODO @@ -25,7 +25,7 @@ #include <linux/libata.h> #define DRV_NAME "pata_hpt3x2n" -#define DRV_VERSION "0.3.7" +#define DRV_VERSION "0.3.8" enum { HPT_PCI_FAST = (1 << 31), @@ -264,7 +264,7 @@ static void hpt3x2n_bmdma_stop(struct ata_queued_cmd *qc) static void hpt3x2n_set_clock(struct ata_port *ap, int source) { - void __iomem *bmdma = ap->ioaddr.bmdma_addr; + void __iomem *bmdma = ap->ioaddr.bmdma_addr - ap->port_no * 8; /* Tristate the bus */ iowrite8(0x80, bmdma+0x73); @@ -274,9 +274,9 @@ static void hpt3x2n_set_clock(struct ata_port *ap, int source) iowrite8(source, bmdma+0x7B); iowrite8(0xC0, bmdma+0x79); - /* Reset state machines */ - iowrite8(0x37, bmdma+0x70); - iowrite8(0x37, bmdma+0x74); + /* Reset state machines, avoid enabling the disabled channels */ + iowrite8(ioread8(bmdma+0x70) | 0x32, bmdma+0x70); + iowrite8(ioread8(bmdma+0x74) | 0x32, bmdma+0x74); /* Complete reset */ iowrite8(0x00, bmdma+0x79); @@ -286,21 +286,10 @@ static void hpt3x2n_set_clock(struct ata_port *ap, int source) iowrite8(0x00, bmdma+0x77); } -/* Check if our partner interface is busy */ - -static int hpt3x2n_pair_idle(struct ata_port *ap) -{ - struct ata_host *host = ap->host; - struct ata_port *pair = host->ports[ap->port_no ^ 1]; - - if (pair->hsm_task_state == HSM_ST_IDLE) - return 1; - return 0; -} - static int hpt3x2n_use_dpll(struct ata_port *ap, int writing) { long flags = (long)ap->host->private_data; + /* See if we should use the DPLL */ if (writing) return USE_DPLL; /* Needed for write */ @@ -309,20 +298,35 @@ static int hpt3x2n_use_dpll(struct ata_port *ap, int writing) return 0; } +static int hpt3x2n_qc_defer(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_port *alt = ap->host->ports[ap->port_no ^ 1]; + int rc, flags = (long)ap->host->private_data; + int dpll = hpt3x2n_use_dpll(ap, qc->tf.flags & ATA_TFLAG_WRITE); + + /* First apply the usual rules */ + rc = ata_std_qc_defer(qc); + if (rc != 0) + return rc; + + if ((flags & USE_DPLL) != dpll && alt->qc_active) + return ATA_DEFER_PORT; + return 0; +} + static unsigned int hpt3x2n_qc_issue(struct ata_queued_cmd *qc) { - struct ata_taskfile *tf = &qc->tf; struct ata_port *ap = qc->ap; int flags = (long)ap->host->private_data; + int dpll = hpt3x2n_use_dpll(ap, qc->tf.flags & ATA_TFLAG_WRITE); - if (hpt3x2n_pair_idle(ap)) { - int dpll = hpt3x2n_use_dpll(ap, (tf->flags & ATA_TFLAG_WRITE)); - if ((flags & USE_DPLL) != dpll) { - if (dpll == 1) - hpt3x2n_set_clock(ap, 0x21); - else - hpt3x2n_set_clock(ap, 0x23); - } + if ((flags & USE_DPLL) != dpll) { + flags &= ~USE_DPLL; + flags |= dpll; + ap->host->private_data = (void *)(long)flags; + + hpt3x2n_set_clock(ap, dpll ? 0x21 : 0x23); } return ata_sff_qc_issue(qc); } @@ -339,6 +343,8 @@ static struct ata_port_operations hpt3x2n_port_ops = { .inherits = &ata_bmdma_port_ops, .bmdma_stop = hpt3x2n_bmdma_stop, + + .qc_defer = hpt3x2n_qc_defer, .qc_issue = hpt3x2n_qc_issue, .cable_detect = hpt3x2n_cable_detect, @@ -454,7 +460,7 @@ static int hpt3x2n_init_one(struct pci_dev *dev, const struct pci_device_id *id) unsigned int f_low, f_high; int adjust; unsigned long iobase = pci_resource_start(dev, 4); - void *hpriv = NULL; + void *hpriv = (void *)USE_DPLL; int rc; rc = pcim_enable_device(dev); @@ -539,7 +545,7 @@ static int hpt3x2n_init_one(struct pci_dev *dev, const struct pci_device_id *id) /* Set our private data up. We only need a few flags so we use it directly */ if (pci_mhz > 60) { - hpriv = (void *)PCI66; + hpriv = (void *)(PCI66 | USE_DPLL); /* * On HPT371N, if ATA clock is 66 MHz we must set bit 2 in * the MISC. register to stretch the UltraDMA Tss timing. diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index d6f69561dc8..37ef416c124 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -853,7 +853,7 @@ static int __devinit octeon_cf_probe(struct platform_device *pdev) return -EINVAL; cs1 = devm_ioremap_nocache(&pdev->dev, res_cs1->start, - res_cs0->end - res_cs1->start + 1); + resource_size(res_cs1)); if (!cs1) return -ENOMEM; diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index a8a7be0d06f..df8ee325d3c 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -59,6 +59,7 @@ #include <linux/dmapool.h> #include <linux/dma-mapping.h> #include <linux/device.h> +#include <linux/clk.h> #include <linux/platform_device.h> #include <linux/ata_platform.h> #include <linux/mbus.h> @@ -538,6 +539,7 @@ struct mv_port_signal { struct mv_host_priv { u32 hp_flags; + unsigned int board_idx; u32 main_irq_mask; struct mv_port_signal signal[8]; const struct mv_hw_ops *ops; @@ -548,6 +550,10 @@ struct mv_host_priv { u32 irq_cause_offset; u32 irq_mask_offset; u32 unmask_all_irqs; + +#if defined(CONFIG_HAVE_CLK) + struct clk *clk; +#endif /* * These consistent DMA memory pools give us guaranteed * alignment for hardware-accessed data structures, @@ -2775,7 +2781,7 @@ static void mv_port_intr(struct ata_port *ap, u32 port_cause) struct mv_port_priv *pp; int edma_was_enabled; - if (!ap || (ap->flags & ATA_FLAG_DISABLED)) { + if (ap->flags & ATA_FLAG_DISABLED) { mv_unexpected_intr(ap, 0); return; } @@ -3393,7 +3399,7 @@ static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv, ZERO(0x024); /* respq outp */ ZERO(0x020); /* respq inp */ ZERO(0x02c); /* test control */ - writel(0xbc, port_mmio + EDMA_IORDY_TMOUT); + writel(0x800, port_mmio + EDMA_IORDY_TMOUT); } #undef ZERO @@ -3854,7 +3860,6 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx) /** * mv_init_host - Perform some early initialization of the host. * @host: ATA host to initialize - * @board_idx: controller index * * If possible, do an early global reset of the host. Then do * our port init and clear/unmask all/relevant host interrupts. @@ -3862,13 +3867,13 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx) * LOCKING: * Inherited from caller. */ -static int mv_init_host(struct ata_host *host, unsigned int board_idx) +static int mv_init_host(struct ata_host *host) { int rc = 0, n_hc, port, hc; struct mv_host_priv *hpriv = host->private_data; void __iomem *mmio = hpriv->base; - rc = mv_chip_id(host, board_idx); + rc = mv_chip_id(host, hpriv->board_idx); if (rc) goto done; @@ -3905,14 +3910,6 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx) void __iomem *port_mmio = mv_port_base(mmio, port); mv_port_init(&ap->ioaddr, port_mmio); - -#ifdef CONFIG_PCI - if (!IS_SOC(hpriv)) { - unsigned int offset = port_mmio - mmio; - ata_port_pbar_desc(ap, MV_PRIMARY_BAR, -1, "mmio"); - ata_port_pbar_desc(ap, MV_PRIMARY_BAR, offset, "port"); - } -#endif } for (hc = 0; hc < n_hc; hc++) { @@ -4035,12 +4032,21 @@ static int mv_platform_probe(struct platform_device *pdev) return -ENOMEM; host->private_data = hpriv; hpriv->n_ports = n_ports; + hpriv->board_idx = chip_soc; host->iomap = NULL; hpriv->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); hpriv->base -= SATAHC0_REG_BASE; +#if defined(CONFIG_HAVE_CLK) + hpriv->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(hpriv->clk)) + dev_notice(&pdev->dev, "cannot get clkdev\n"); + else + clk_enable(hpriv->clk); +#endif + /* * (Re-)program MBUS remapping windows if we are asked to. */ @@ -4049,12 +4055,12 @@ static int mv_platform_probe(struct platform_device *pdev) rc = mv_create_dma_pools(hpriv, &pdev->dev); if (rc) - return rc; + goto err; /* initialize adapter */ - rc = mv_init_host(host, chip_soc); + rc = mv_init_host(host); if (rc) - return rc; + goto err; dev_printk(KERN_INFO, &pdev->dev, "slots %u ports %d\n", (unsigned)MV_MAX_Q_DEPTH, @@ -4062,6 +4068,15 @@ static int mv_platform_probe(struct platform_device *pdev) return ata_host_activate(host, platform_get_irq(pdev, 0), mv_interrupt, IRQF_SHARED, &mv6_sht); +err: +#if defined(CONFIG_HAVE_CLK) + if (!IS_ERR(hpriv->clk)) { + clk_disable(hpriv->clk); + clk_put(hpriv->clk); + } +#endif + + return rc; } /* @@ -4076,14 +4091,66 @@ static int __devexit mv_platform_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ata_host *host = dev_get_drvdata(dev); - +#if defined(CONFIG_HAVE_CLK) + struct mv_host_priv *hpriv = host->private_data; +#endif ata_host_detach(host); + +#if defined(CONFIG_HAVE_CLK) + if (!IS_ERR(hpriv->clk)) { + clk_disable(hpriv->clk); + clk_put(hpriv->clk); + } +#endif return 0; } +#ifdef CONFIG_PM +static int mv_platform_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct ata_host *host = dev_get_drvdata(&pdev->dev); + if (host) + return ata_host_suspend(host, state); + else + return 0; +} + +static int mv_platform_resume(struct platform_device *pdev) +{ + struct ata_host *host = dev_get_drvdata(&pdev->dev); + int ret; + + if (host) { + struct mv_host_priv *hpriv = host->private_data; + const struct mv_sata_platform_data *mv_platform_data = \ + pdev->dev.platform_data; + /* + * (Re-)program MBUS remapping windows if we are asked to. + */ + if (mv_platform_data->dram != NULL) + mv_conf_mbus_windows(hpriv, mv_platform_data->dram); + + /* initialize adapter */ + ret = mv_init_host(host); + if (ret) { + printk(KERN_ERR DRV_NAME ": Error during HW init\n"); + return ret; + } + ata_host_resume(host); + } + + return 0; +} +#else +#define mv_platform_suspend NULL +#define mv_platform_resume NULL +#endif + static struct platform_driver mv_platform_driver = { .probe = mv_platform_probe, .remove = __devexit_p(mv_platform_remove), + .suspend = mv_platform_suspend, + .resume = mv_platform_resume, .driver = { .name = DRV_NAME, .owner = THIS_MODULE, @@ -4094,6 +4161,9 @@ static struct platform_driver mv_platform_driver = { #ifdef CONFIG_PCI static int mv_pci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); +#ifdef CONFIG_PM +static int mv_pci_device_resume(struct pci_dev *pdev); +#endif static struct pci_driver mv_pci_driver = { @@ -4101,6 +4171,11 @@ static struct pci_driver mv_pci_driver = { .id_table = mv_pci_tbl, .probe = mv_pci_init_one, .remove = ata_pci_remove_one, +#ifdef CONFIG_PM + .suspend = ata_pci_device_suspend, + .resume = mv_pci_device_resume, +#endif + }; /* move to PCI layer or libata core? */ @@ -4194,7 +4269,7 @@ static int mv_pci_init_one(struct pci_dev *pdev, const struct ata_port_info *ppi[] = { &mv_port_info[board_idx], NULL }; struct ata_host *host; struct mv_host_priv *hpriv; - int n_ports, rc; + int n_ports, port, rc; if (!printed_version++) dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n"); @@ -4208,6 +4283,7 @@ static int mv_pci_init_one(struct pci_dev *pdev, return -ENOMEM; host->private_data = hpriv; hpriv->n_ports = n_ports; + hpriv->board_idx = board_idx; /* acquire resources */ rc = pcim_enable_device(pdev); @@ -4230,8 +4306,17 @@ static int mv_pci_init_one(struct pci_dev *pdev, if (rc) return rc; + for (port = 0; port < host->n_ports; port++) { + struct ata_port *ap = host->ports[port]; + void __iomem *port_mmio = mv_port_base(hpriv->base, port); + unsigned int offset = port_mmio - hpriv->base; + + ata_port_pbar_desc(ap, MV_PRIMARY_BAR, -1, "mmio"); + ata_port_pbar_desc(ap, MV_PRIMARY_BAR, offset, "port"); + } + /* initialize adapter */ - rc = mv_init_host(host, board_idx); + rc = mv_init_host(host); if (rc) return rc; @@ -4247,6 +4332,27 @@ static int mv_pci_init_one(struct pci_dev *pdev, return ata_host_activate(host, pdev->irq, mv_interrupt, IRQF_SHARED, IS_GEN_I(hpriv) ? &mv5_sht : &mv6_sht); } + +#ifdef CONFIG_PM +static int mv_pci_device_resume(struct pci_dev *pdev) +{ + struct ata_host *host = dev_get_drvdata(&pdev->dev); + int rc; + + rc = ata_pci_device_do_resume(pdev); + if (rc) + return rc; + + /* initialize adapter */ + rc = mv_init_host(host); + if (rc) + return rc; + + ata_host_resume(host); + + return 0; +} +#endif #endif static int mv_platform_probe(struct platform_device *pdev); diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index fd1231738ef..148b1dd2407 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -218,7 +218,7 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) spin_unlock_irqrestore(&aliastree.lock, flags); newlcu = _allocate_lcu(uid); if (IS_ERR(newlcu)) - return PTR_ERR(lcu); + return PTR_ERR(newlcu); spin_lock_irqsave(&aliastree.lock, flags); lcu = _find_lcu(server, uid); if (!lcu) { diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index f64d0db881b..6e14863f5c7 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -8,7 +8,7 @@ * */ -#define KMSG_COMPONENT "dasd-diag" +#define KMSG_COMPONENT "dasd" #include <linux/stddef.h> #include <linux/kernel.h> @@ -146,16 +146,16 @@ dasd_diag_erp(struct dasd_device *device) rc = mdsk_init_io(device, device->block->bp_block, 0, NULL); if (rc == 4) { if (!(device->features & DASD_FEATURE_READONLY)) { - dev_warn(&device->cdev->dev, - "The access mode of a DIAG device changed" - " to read-only"); + pr_warning("%s: The access mode of a DIAG device " + "changed to read-only\n", + dev_name(&device->cdev->dev)); device->features |= DASD_FEATURE_READONLY; } rc = 0; } if (rc) - dev_warn(&device->cdev->dev, "DIAG ERP failed with " - "rc=%d\n", rc); + pr_warning("%s: DIAG ERP failed with " + "rc=%d\n", dev_name(&device->cdev->dev), rc); } /* Start a given request at the device. Return zero on success, non-zero @@ -371,8 +371,9 @@ dasd_diag_check_device(struct dasd_device *device) private->pt_block = 2; break; default: - dev_warn(&device->cdev->dev, "Device type %d is not supported " - "in DIAG mode\n", private->rdc_data.vdev_class); + pr_warning("%s: Device type %d is not supported " + "in DIAG mode\n", dev_name(&device->cdev->dev), + private->rdc_data.vdev_class); rc = -EOPNOTSUPP; goto out; } @@ -413,8 +414,8 @@ dasd_diag_check_device(struct dasd_device *device) private->iob.flaga = DASD_DIAG_FLAGA_DEFAULT; rc = dia250(&private->iob, RW_BIO); if (rc == 3) { - dev_warn(&device->cdev->dev, - "A 64-bit DIAG call failed\n"); + pr_warning("%s: A 64-bit DIAG call failed\n", + dev_name(&device->cdev->dev)); rc = -EOPNOTSUPP; goto out_label; } @@ -423,8 +424,9 @@ dasd_diag_check_device(struct dasd_device *device) break; } if (bsize > PAGE_SIZE) { - dev_warn(&device->cdev->dev, "Accessing the DASD failed because" - " of an incorrect format (rc=%d)\n", rc); + pr_warning("%s: Accessing the DASD failed because of an " + "incorrect format (rc=%d)\n", + dev_name(&device->cdev->dev), rc); rc = -EIO; goto out_label; } @@ -442,18 +444,18 @@ dasd_diag_check_device(struct dasd_device *device) block->s2b_shift++; rc = mdsk_init_io(device, block->bp_block, 0, NULL); if (rc && (rc != 4)) { - dev_warn(&device->cdev->dev, "DIAG initialization " - "failed with rc=%d\n", rc); + pr_warning("%s: DIAG initialization failed with rc=%d\n", + dev_name(&device->cdev->dev), rc); rc = -EIO; } else { if (rc == 4) device->features |= DASD_FEATURE_READONLY; - dev_info(&device->cdev->dev, - "New DASD with %ld byte/block, total size %ld KB%s\n", - (unsigned long) block->bp_block, - (unsigned long) (block->blocks << - block->s2b_shift) >> 1, - (rc == 4) ? ", read-only device" : ""); + pr_info("%s: New DASD with %ld byte/block, total size %ld " + "KB%s\n", dev_name(&device->cdev->dev), + (unsigned long) block->bp_block, + (unsigned long) (block->blocks << + block->s2b_shift) >> 1, + (rc == 4) ? ", read-only device" : ""); rc = 0; } out_label: diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 28e4649fa9e..247b2b93472 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -467,7 +467,7 @@ fs3270_open(struct inode *inode, struct file *filp) if (IS_ERR(ib)) { raw3270_put_view(&fp->view); raw3270_del_view(&fp->view); - rc = PTR_ERR(fp); + rc = PTR_ERR(ib); goto out; } fp->rdbuf = ib; diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 3657fe103c2..cb70fa1cf53 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -9,6 +9,7 @@ */ #define KMSG_COMPONENT "tape_34xx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/init.h> diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 0c72aadb839..9821c588661 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -9,6 +9,7 @@ */ #define KMSG_COMPONENT "tape_3590" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/init.h> @@ -136,7 +137,7 @@ static void int_to_ext_kekl(struct tape3592_kekl *in, out->type_on_tape = TAPE390_KEKL_TYPE_LABEL; memcpy(out->label, in->label, sizeof(in->label)); EBCASC(out->label, sizeof(in->label)); - strstrip(out->label); + strim(out->label); } static void int_to_ext_kekl_pair(struct tape3592_kekl_pair *in, diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 4799cc2f73c..96816149368 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -11,6 +11,7 @@ */ #define KMSG_COMPONENT "tape" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/fs.h> #include <linux/module.h> diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c index 23d773a0d11..2125ec7d95f 100644 --- a/drivers/s390/char/tape_char.c +++ b/drivers/s390/char/tape_char.c @@ -10,6 +10,9 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#define KMSG_COMPONENT "tape" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/proc_fs.h> diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c index ddc914ccea8..b2864e3edb6 100644 --- a/drivers/s390/char/tape_class.c +++ b/drivers/s390/char/tape_class.c @@ -7,6 +7,10 @@ * Author: Stefan Bader <shbader@de.ibm.com> * Based on simple class device code by Greg K-H */ + +#define KMSG_COMPONENT "tape" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include "tape_class.h" MODULE_AUTHOR("Stefan Bader <shbader@de.ibm.com>"); diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index f5d6802dc5d..81b094e480e 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -12,6 +12,8 @@ */ #define KMSG_COMPONENT "tape" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/init.h> // for kernel parameters #include <linux/kmod.h> // for requesting modules diff --git a/drivers/s390/char/tape_proc.c b/drivers/s390/char/tape_proc.c index ebd820ccfb2..0ceb37984f7 100644 --- a/drivers/s390/char/tape_proc.c +++ b/drivers/s390/char/tape_proc.c @@ -11,6 +11,9 @@ * PROCFS Functions */ +#define KMSG_COMPONENT "tape" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/seq_file.h> diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c index 750354ad16e..03f07e5dd6e 100644 --- a/drivers/s390/char/tape_std.c +++ b/drivers/s390/char/tape_std.c @@ -11,6 +11,9 @@ * Stefan Bader <shbader@de.ibm.com> */ +#define KMSG_COMPONENT "tape" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/bio.h> diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c index 9509e386093..7a28a3029a3 100644 --- a/drivers/s390/cio/ccwreq.c +++ b/drivers/s390/cio/ccwreq.c @@ -49,7 +49,6 @@ static u16 ccwreq_next_path(struct ccw_device *cdev) */ static void ccwreq_stop(struct ccw_device *cdev, int rc) { - struct subchannel *sch = to_subchannel(cdev->dev.parent); struct ccw_request *req = &cdev->private->req; if (req->done) @@ -57,7 +56,6 @@ static void ccwreq_stop(struct ccw_device *cdev, int rc) req->done = 1; ccw_device_set_timeout(cdev, 0); memset(&cdev->private->irb, 0, sizeof(struct irb)); - sch->lpm = sch->schib.pmcw.pam; if (rc && rc != -ENODEV && req->drc) rc = req->drc; req->callback(cdev, req->data, rc); @@ -80,7 +78,6 @@ static void ccwreq_do(struct ccw_device *cdev) continue; } /* Perform start function. */ - sch->lpm = 0xff; memset(&cdev->private->irb, 0, sizeof(struct irb)); rc = cio_start(sch, cp, (u8) req->mask); if (rc == 0) { diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 73901c9e260..a6c7d5426fb 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1519,6 +1519,7 @@ static int ccw_device_console_enable(struct ccw_device *cdev, sch->driver = &io_subchannel_driver; /* Initialize the ccw_device structure. */ cdev->dev.parent= &sch->dev; + sch_set_cdev(sch, cdev); io_subchannel_recog(cdev, sch); /* Now wait for the async. recognition to come to an end. */ spin_lock_irq(cdev->ccwlock); diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c index aad188e43b4..6facb5499a6 100644 --- a/drivers/s390/cio/device_pgid.c +++ b/drivers/s390/cio/device_pgid.c @@ -142,7 +142,7 @@ static void spid_do(struct ccw_device *cdev) u8 fn; /* Use next available path that is not already in correct state. */ - req->lpm = lpm_adjust(req->lpm, sch->schib.pmcw.pam & ~sch->vpm); + req->lpm = lpm_adjust(req->lpm, cdev->private->pgid_todo_mask); if (!req->lpm) goto out_nopath; /* Channel program setup. */ @@ -254,15 +254,15 @@ static void pgid_analyze(struct ccw_device *cdev, struct pgid **p, *p = first; } -static u8 pgid_to_vpm(struct ccw_device *cdev) +static u8 pgid_to_donepm(struct ccw_device *cdev) { struct subchannel *sch = to_subchannel(cdev->dev.parent); struct pgid *pgid; int i; int lpm; - u8 vpm = 0; + u8 donepm = 0; - /* Set VPM bits for paths which are already in the target state. */ + /* Set bits for paths which are already in the target state. */ for (i = 0; i < 8; i++) { lpm = 0x80 >> i; if ((cdev->private->pgid_valid_mask & lpm) == 0) @@ -282,10 +282,10 @@ static u8 pgid_to_vpm(struct ccw_device *cdev) if (pgid->inf.ps.state3 != SNID_STATE3_SINGLE_PATH) continue; } - vpm |= lpm; + donepm |= lpm; } - return vpm; + return donepm; } static void pgid_fill(struct ccw_device *cdev, struct pgid *pgid) @@ -307,6 +307,7 @@ static void snid_done(struct ccw_device *cdev, int rc) int mismatch = 0; int reserved = 0; int reset = 0; + u8 donepm; if (rc) goto out; @@ -316,18 +317,20 @@ static void snid_done(struct ccw_device *cdev, int rc) else if (mismatch) rc = -EOPNOTSUPP; else { - sch->vpm = pgid_to_vpm(cdev); + donepm = pgid_to_donepm(cdev); + sch->vpm = donepm & sch->opm; + cdev->private->pgid_todo_mask &= ~donepm; pgid_fill(cdev, pgid); } out: CIO_MSG_EVENT(2, "snid: device 0.%x.%04x: rc=%d pvm=%02x vpm=%02x " - "mism=%d rsvd=%d reset=%d\n", id->ssid, id->devno, rc, - cdev->private->pgid_valid_mask, sch->vpm, mismatch, - reserved, reset); + "todo=%02x mism=%d rsvd=%d reset=%d\n", id->ssid, + id->devno, rc, cdev->private->pgid_valid_mask, sch->vpm, + cdev->private->pgid_todo_mask, mismatch, reserved, reset); switch (rc) { case 0: /* Anything left to do? */ - if (sch->vpm == sch->schib.pmcw.pam) { + if (cdev->private->pgid_todo_mask == 0) { verify_done(cdev, sch->vpm == 0 ? -EACCES : 0); return; } @@ -411,6 +414,7 @@ static void verify_start(struct ccw_device *cdev) struct ccw_dev_id *devid = &cdev->private->dev_id; sch->vpm = 0; + sch->lpm = sch->schib.pmcw.pam; /* Initialize request data. */ memset(req, 0, sizeof(*req)); req->timeout = PGID_TIMEOUT; @@ -442,11 +446,14 @@ static void verify_start(struct ccw_device *cdev) */ void ccw_device_verify_start(struct ccw_device *cdev) { + struct subchannel *sch = to_subchannel(cdev->dev.parent); + CIO_TRACE_EVENT(4, "vrfy"); CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id)); /* Initialize PGID data. */ memset(cdev->private->pgid, 0, sizeof(cdev->private->pgid)); cdev->private->pgid_valid_mask = 0; + cdev->private->pgid_todo_mask = sch->schib.pmcw.pam; /* * Initialize pathgroup and multipath state with target values. * They may change in the course of path verification. diff --git a/drivers/s390/cio/fcx.c b/drivers/s390/cio/fcx.c index 61677dfbdc9..ca5e9bb9d45 100644 --- a/drivers/s390/cio/fcx.c +++ b/drivers/s390/cio/fcx.c @@ -163,7 +163,7 @@ void tcw_finalize(struct tcw *tcw, int num_tidaws) /* Add tcat to tccb. */ tccb = tcw_get_tccb(tcw); tcat = (struct tccb_tcat *) &tccb->tca[tca_size(tccb)]; - memset(tcat, 0, sizeof(tcat)); + memset(tcat, 0, sizeof(*tcat)); /* Calculate tcw input/output count and tcat transport count. */ count = calc_dcw_count(tccb); if (tcw->w && (tcw->flags & TCW_FLAGS_OUTPUT_TIDA)) @@ -269,7 +269,7 @@ EXPORT_SYMBOL(tccb_init); */ void tsb_init(struct tsb *tsb) { - memset(tsb, 0, sizeof(tsb)); + memset(tsb, 0, sizeof(*tsb)); } EXPORT_SYMBOL(tsb_init); diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index d72ae4c93af..b9ce712a7f2 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -150,6 +150,7 @@ struct ccw_device_private { struct ccw_request req; /* internal I/O request */ int iretry; u8 pgid_valid_mask; /* mask of valid PGIDs */ + u8 pgid_todo_mask; /* mask of PGIDs to be adjusted */ struct { unsigned int fast:1; /* post with "channel end" */ unsigned int repall:1; /* report every interrupt status */ diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4be6e84b959..b2275c5000e 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -486,7 +486,8 @@ static int get_inbound_buffer_frontier(struct qdio_q *q) case SLSB_P_INPUT_PRIMED: inbound_primed(q, count); q->first_to_check = add_buf(q->first_to_check, count); - atomic_sub(count, &q->nr_buf_used); + if (atomic_sub(count, &q->nr_buf_used) == 0) + qdio_perf_stat_inc(&perf_stats.inbound_queue_full); break; case SLSB_P_INPUT_ERROR: announce_buffer_error(q, count); diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c index 968e3c7c263..54f7c325a3e 100644 --- a/drivers/s390/cio/qdio_perf.c +++ b/drivers/s390/cio/qdio_perf.c @@ -64,6 +64,8 @@ static int qdio_perf_proc_show(struct seq_file *m, void *v) (long)atomic_long_read(&perf_stats.fast_requeue)); seq_printf(m, "Number of outbound target full condition\t: %li\n", (long)atomic_long_read(&perf_stats.outbound_target_full)); + seq_printf(m, "Number of inbound queue full condition\t\t: %li\n", + (long)atomic_long_read(&perf_stats.inbound_queue_full)); seq_printf(m, "Number of outbound tasklet mod_timer calls\t: %li\n", (long)atomic_long_read(&perf_stats.debug_tl_out_timer)); seq_printf(m, "Number of stop polling calls\t\t\t: %li\n", diff --git a/drivers/s390/cio/qdio_perf.h b/drivers/s390/cio/qdio_perf.h index ff4504ce1e3..12454231dc8 100644 --- a/drivers/s390/cio/qdio_perf.h +++ b/drivers/s390/cio/qdio_perf.h @@ -36,6 +36,7 @@ struct qdio_perf_stats { atomic_long_t outbound_handler; atomic_long_t fast_requeue; atomic_long_t outbound_target_full; + atomic_long_t inbound_queue_full; /* for debugging */ atomic_long_t debug_tl_out_timer; diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 18d54fc21ce..8c2dea5fa2b 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -48,7 +48,6 @@ static void set_impl_params(struct qdio_irq *irq_ptr, if (!irq_ptr) return; - WARN_ON((unsigned long)&irq_ptr->qib & 0xff); irq_ptr->qib.pfmt = qib_param_field_format; if (qib_param_field) memcpy(irq_ptr->qib.parm, qib_param_field, @@ -82,14 +81,12 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues) q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); if (!q) return -ENOMEM; - WARN_ON((unsigned long)q & 0xff); q->slib = (struct slib *) __get_free_page(GFP_KERNEL); if (!q->slib) { kmem_cache_free(qdio_q_cache, q); return -ENOMEM; } - WARN_ON((unsigned long)q->slib & 0x7ff); irq_ptr_qs[i] = q; } return 0; @@ -131,7 +128,7 @@ static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, /* fill in sbal */ for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) { q->sbal[j] = *sbals_array++; - WARN_ON((unsigned long)q->sbal[j] & 0xff); + BUG_ON((unsigned long)q->sbal[j] & 0xff); } /* fill in slib */ @@ -147,11 +144,6 @@ static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, /* fill in sl */ for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) q->sl->element[j].sbal = (unsigned long)q->sbal[j]; - - DBF_EVENT("sl-slsb-sbal"); - DBF_HEX(q->sl, sizeof(void *)); - DBF_HEX(&q->slsb, sizeof(void *)); - DBF_HEX(q->sbal, sizeof(void *)); } static void setup_queues(struct qdio_irq *irq_ptr, diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index da84fd03850..088f32f29a6 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -368,7 +368,7 @@ config ALIM7101_WDT config GEODE_WDT tristate "AMD Geode CS5535/CS5536 Watchdog" - depends on MGEODE_LX + depends on CS5535_MFGPT help This driver enables a watchdog capability built into the CS5535/CS5536 companion chips for the AMD Geode GX and LX diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c index 9acf0015a1e..38252ff828c 100644 --- a/drivers/watchdog/geodewdt.c +++ b/drivers/watchdog/geodewdt.c @@ -1,6 +1,7 @@ -/* Watchdog timer for the Geode GX/LX with the CS5535/CS5536 companion chip +/* Watchdog timer for machines with the CS5535/CS5536 companion chip * * Copyright (C) 2006-2007, Advanced Micro Devices, Inc. + * Copyright (C) 2009 Andres Salomon <dilinger@collabora.co.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,7 +20,7 @@ #include <linux/reboot.h> #include <linux/uaccess.h> -#include <asm/geode.h> +#include <linux/cs5535.h> #define GEODEWDT_HZ 500 #define GEODEWDT_SCALE 6 @@ -46,25 +47,25 @@ MODULE_PARM_DESC(nowayout, static struct platform_device *geodewdt_platform_device; static unsigned long wdt_flags; -static int wdt_timer; +static struct cs5535_mfgpt_timer *wdt_timer; static int safe_close; static void geodewdt_ping(void) { /* Stop the counter */ - geode_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, 0); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, 0); /* Reset the counter */ - geode_mfgpt_write(wdt_timer, MFGPT_REG_COUNTER, 0); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_COUNTER, 0); /* Enable the counter */ - geode_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, MFGPT_SETUP_CNTEN); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, MFGPT_SETUP_CNTEN); } static void geodewdt_disable(void) { - geode_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, 0); - geode_mfgpt_write(wdt_timer, MFGPT_REG_COUNTER, 0); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, 0); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_COUNTER, 0); } static int geodewdt_set_heartbeat(int val) @@ -72,10 +73,10 @@ static int geodewdt_set_heartbeat(int val) if (val < 1 || val > GEODEWDT_MAX_SECONDS) return -EINVAL; - geode_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, 0); - geode_mfgpt_write(wdt_timer, MFGPT_REG_CMP2, val * GEODEWDT_HZ); - geode_mfgpt_write(wdt_timer, MFGPT_REG_COUNTER, 0); - geode_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, MFGPT_SETUP_CNTEN); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, 0); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_CMP2, val * GEODEWDT_HZ); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_COUNTER, 0); + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, MFGPT_SETUP_CNTEN); timeout = val; return 0; @@ -215,28 +216,25 @@ static struct miscdevice geodewdt_miscdev = { static int __devinit geodewdt_probe(struct platform_device *dev) { - int ret, timer; - - timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); + int ret; - if (timer == -1) { + wdt_timer = cs5535_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); + if (!wdt_timer) { printk(KERN_ERR "geodewdt: No timers were available\n"); return -ENODEV; } - wdt_timer = timer; - /* Set up the timer */ - geode_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_SETUP, GEODEWDT_SCALE | (3 << 8)); /* Set up comparator 2 to reset when the event fires */ - geode_mfgpt_toggle_event(wdt_timer, MFGPT_CMP2, MFGPT_EVENT_RESET, 1); + cs5535_mfgpt_toggle_event(wdt_timer, MFGPT_CMP2, MFGPT_EVENT_RESET, 1); /* Set up the initial timeout */ - geode_mfgpt_write(wdt_timer, MFGPT_REG_CMP2, + cs5535_mfgpt_write(wdt_timer, MFGPT_REG_CMP2, timeout * GEODEWDT_HZ); ret = misc_register(&geodewdt_miscdev); diff --git a/fs/proc/array.c b/fs/proc/array.c index 4badde179b1..f560325c444 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -134,13 +134,16 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) * simple bit tests. */ static const char *task_state_array[] = { - "R (running)", /* 0 */ - "S (sleeping)", /* 1 */ - "D (disk sleep)", /* 2 */ - "T (stopped)", /* 4 */ - "T (tracing stop)", /* 8 */ - "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "R (running)", /* 0 */ + "S (sleeping)", /* 1 */ + "D (disk sleep)", /* 2 */ + "T (stopped)", /* 4 */ + "t (tracing stop)", /* 8 */ + "Z (zombie)", /* 16 */ + "X (dead)", /* 32 */ + "x (dead)", /* 64 */ + "K (wakekill)", /* 128 */ + "W (waking)", /* 256 */ }; static inline const char *get_task_state(struct task_struct *tsk) @@ -148,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk) unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; const char **p = &task_state_array[0]; + BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); + while (state) { p++; state >>= 1; diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 12ff8c3f1d0..5032b9a31ae 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -25,7 +25,7 @@ static void *malloc(int size) void *p; if (size < 0) - error("Malloc error"); + return NULL; if (!malloc_ptr) malloc_ptr = free_mem_ptr; @@ -35,7 +35,7 @@ static void *malloc(int size) malloc_ptr += size; if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr) - error("Out of memory"); + return NULL; malloc_count++; return p; diff --git a/include/linux/elf.h b/include/linux/elf.h index 90a4ed0ea0e..0cc4d55151b 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -361,7 +361,7 @@ typedef struct elf64_shdr { #define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ -#define NT_PRXSTATUS 0x300 /* s390 upper register halves */ +#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ /* Note header in a PT_NOTE section */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 849b4a61bd8..2265f28eb47 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1037,6 +1037,9 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); +void sort_node_map(void); +unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, + unsigned long end_pfn); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h deleted file mode 100644 index e3fb2560670..00000000000 --- a/include/linux/perf_counter.h +++ /dev/null @@ -1,444 +0,0 @@ -/* - * NOTE: this file will be removed in a future kernel release, it is - * provided as a courtesy copy of user-space code that relies on the - * old (pre-rename) symbols and constants. - * - * Performance events: - * - * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> - * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra - * - * Data type definitions, declarations, prototypes. - * - * Started by: Thomas Gleixner and Ingo Molnar - * - * For licencing details see kernel-base/COPYING - */ -#ifndef _LINUX_PERF_COUNTER_H -#define _LINUX_PERF_COUNTER_H - -#include <linux/types.h> -#include <linux/ioctl.h> -#include <asm/byteorder.h> - -/* - * User-space ABI bits: - */ - -/* - * attr.type - */ -enum perf_type_id { - PERF_TYPE_HARDWARE = 0, - PERF_TYPE_SOFTWARE = 1, - PERF_TYPE_TRACEPOINT = 2, - PERF_TYPE_HW_CACHE = 3, - PERF_TYPE_RAW = 4, - - PERF_TYPE_MAX, /* non-ABI */ -}; - -/* - * Generalized performance counter event types, used by the - * attr.event_id parameter of the sys_perf_counter_open() - * syscall: - */ -enum perf_hw_id { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_HW_CPU_CYCLES = 0, - PERF_COUNT_HW_INSTRUCTIONS = 1, - PERF_COUNT_HW_CACHE_REFERENCES = 2, - PERF_COUNT_HW_CACHE_MISSES = 3, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_HW_BRANCH_MISSES = 5, - PERF_COUNT_HW_BUS_CYCLES = 6, - - PERF_COUNT_HW_MAX, /* non-ABI */ -}; - -/* - * Generalized hardware cache counters: - * - * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x - * { read, write, prefetch } x - * { accesses, misses } - */ -enum perf_hw_cache_id { - PERF_COUNT_HW_CACHE_L1D = 0, - PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_LL = 2, - PERF_COUNT_HW_CACHE_DTLB = 3, - PERF_COUNT_HW_CACHE_ITLB = 4, - PERF_COUNT_HW_CACHE_BPU = 5, - - PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ = 0, - PERF_COUNT_HW_CACHE_OP_WRITE = 1, - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - - PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_result_id { - PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, - PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ -}; - -/* - * Special "software" counters provided by the kernel, even if the hardware - * does not support performance counters. These counters measure various - * physical and sw events of the kernel (and allow the profiling of them as - * well): - */ -enum perf_sw_ids { - PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, - PERF_COUNT_SW_CONTEXT_SWITCHES = 3, - PERF_COUNT_SW_CPU_MIGRATIONS = 4, - PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, - PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, - PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, - PERF_COUNT_SW_EMULATION_FAULTS = 8, - - PERF_COUNT_SW_MAX, /* non-ABI */ -}; - -/* - * Bits that can be set in attr.sample_type to request information - * in the overflow packets. - */ -enum perf_counter_sample_format { - PERF_SAMPLE_IP = 1U << 0, - PERF_SAMPLE_TID = 1U << 1, - PERF_SAMPLE_TIME = 1U << 2, - PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_READ = 1U << 4, - PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_ID = 1U << 6, - PERF_SAMPLE_CPU = 1U << 7, - PERF_SAMPLE_PERIOD = 1U << 8, - PERF_SAMPLE_STREAM_ID = 1U << 9, - PERF_SAMPLE_RAW = 1U << 10, - - PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ -}; - -/* - * The format of the data returned by read() on a perf counter fd, - * as specified by attr.read_format: - * - * struct read_format { - * { u64 value; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 id; } && PERF_FORMAT_ID - * } && !PERF_FORMAT_GROUP - * - * { u64 nr; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 value; - * { u64 id; } && PERF_FORMAT_ID - * } cntr[nr]; - * } && PERF_FORMAT_GROUP - * }; - */ -enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, - PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, - PERF_FORMAT_ID = 1U << 2, - PERF_FORMAT_GROUP = 1U << 3, - - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ -}; - -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_attr { - - /* - * Major type: hardware/software/tracepoint/etc. - */ - __u32 type; - - /* - * Size of the attr structure, for fwd/bwd compat. - */ - __u32 size; - - /* - * Type specific configuration information. - */ - __u64 config; - - union { - __u64 sample_period; - __u64 sample_freq; - }; - - __u64 sample_type; - __u64 read_format; - - __u64 disabled : 1, /* off by default */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - mmap : 1, /* include mmap data */ - comm : 1, /* include comm data */ - freq : 1, /* use freq, not period */ - inherit_stat : 1, /* per task counts */ - enable_on_exec : 1, /* next exec enables */ - task : 1, /* trace fork/exit */ - watermark : 1, /* wakeup_watermark */ - - __reserved_1 : 49; - - union { - __u32 wakeup_events; /* wakeup every n events */ - __u32 wakeup_watermark; /* bytes before wakeup */ - }; - __u32 __reserved_2; - - __u64 __reserved_3; -}; - -/* - * Ioctls that can be done on a perf counter fd: - */ -#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) -#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) -#define PERF_COUNTER_IOC_RESET _IO ('$', 3) -#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) -#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_COUNTER_IOC_SET_FILTER _IOW('$', 6, char *) - -enum perf_counter_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, -}; - -/* - * Structure of the page that can be mapped via mmap - */ -struct perf_counter_mmap_page { - __u32 version; /* version number of this structure */ - __u32 compat_version; /* lowest version this is compat with */ - - /* - * Bits needed to read the hw counters in user-space. - * - * u32 seq; - * s64 count; - * - * do { - * seq = pc->lock; - * - * barrier() - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; - * - * barrier(); - * } while (pc->lock != seq); - * - * NOTE: for obvious reason this only works on self-monitoring - * processes. - */ - __u32 lock; /* seqlock for synchronization */ - __u32 index; /* hardware counter identifier */ - __s64 offset; /* add to hardware counter value */ - __u64 time_enabled; /* time counter active */ - __u64 time_running; /* time counter on cpu */ - - /* - * Hole for extension of the self monitor capabilities - */ - - __u64 __reserved[123]; /* align to 1k */ - - /* - * Control data for the mmap() data buffer. - * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_counter_wakeup(). - * - * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. - */ - __u64 data_head; /* head in the data section */ - __u64 data_tail; /* user-space written tail */ -}; - -#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) -#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (2 << 0) -#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) - -struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; -}; - -enum perf_event_type { - - /* - * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: - * - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * u64 addr; - * u64 len; - * u64 pgoff; - * char filename[]; - * }; - */ - PERF_EVENT_MMAP = 1, - - /* - * struct { - * struct perf_event_header header; - * u64 id; - * u64 lost; - * }; - */ - PERF_EVENT_LOST = 2, - - /* - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * char comm[]; - * }; - */ - PERF_EVENT_COMM = 3, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, ppid; - * u32 tid, ptid; - * u64 time; - * }; - */ - PERF_EVENT_EXIT = 4, - - /* - * struct { - * struct perf_event_header header; - * u64 time; - * u64 id; - * u64 stream_id; - * }; - */ - PERF_EVENT_THROTTLE = 5, - PERF_EVENT_UNTHROTTLE = 6, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, ppid; - * u32 tid, ptid; - * u64 time; - * }; - */ - PERF_EVENT_FORK = 7, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, tid; - * - * struct read_format values; - * }; - */ - PERF_EVENT_READ = 8, - - /* - * struct { - * struct perf_event_header header; - * - * { u64 ip; } && PERF_SAMPLE_IP - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 addr; } && PERF_SAMPLE_ADDR - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU - * { u64 period; } && PERF_SAMPLE_PERIOD - * - * { struct read_format values; } && PERF_SAMPLE_READ - * - * { u64 nr, - * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN - * - * # - * # The RAW record below is opaque data wrt the ABI - * # - * # That is, the ABI doesn't make any promises wrt to - * # the stability of its content, it may vary depending - * # on event, hardware, kernel version and phase of - * # the moon. - * # - * # In other words, PERF_SAMPLE_RAW contents are not an ABI. - * # - * - * { u32 size; - * char data[size];}&& PERF_SAMPLE_RAW - * }; - */ - PERF_EVENT_SAMPLE = 9, - - PERF_EVENT_MAX, /* non-ABI */ -}; - -enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, - - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, - - PERF_CONTEXT_MAX = (__u64)-4095, -}; - -#define PERF_FLAG_FD_NO_GROUP (1U << 0) -#define PERF_FLAG_FD_OUTPUT (1U << 1) - -/* - * In case some app still references the old symbols: - */ - -#define __NR_perf_counter_open __NR_perf_event_open - -#define PR_TASK_PERF_COUNTERS_DISABLE PR_TASK_PERF_EVENTS_DISABLE -#define PR_TASK_PERF_COUNTERS_ENABLE PR_TASK_PERF_EVENTS_ENABLE - -#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c4ba9a78721..96cc307ed9f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -101,4 +101,9 @@ static inline void exit_rcu(void) { } +static inline int rcu_preempt_depth(void) +{ + return 0; +} + #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c93eee5911b..8044b1b9433 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,6 +45,12 @@ extern void __rcu_read_unlock(void); extern void synchronize_rcu(void); extern void exit_rcu(void); +/* + * Defined as macro as it is a very low level header + * included from areas that don't even know about current + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ static inline void __rcu_read_lock(void) @@ -63,6 +69,11 @@ static inline void exit_rcu(void) { } +static inline int rcu_preempt_depth(void) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ static inline void __rcu_read_lock_bh(void) diff --git a/include/linux/sched.h b/include/linux/sched.h index e89857812be..f2f842db03c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -192,6 +192,12 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define TASK_DEAD 64 #define TASK_WAKEKILL 128 #define TASK_WAKING 256 +#define TASK_STATE_MAX 512 + +#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" + +extern char ___assert_task_state[1 - 2*!!( + sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -1091,7 +1097,8 @@ struct sched_class { enum cpu_idle_type idle); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); - void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); + void (*task_waking) (struct rq *this_rq, struct task_struct *task); + void (*task_woken) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, const struct cpumask *newmask); @@ -1115,7 +1122,7 @@ struct sched_class { struct task_struct *task); #ifdef CONFIG_FAIR_GROUP_SCHED - void (*moved_group) (struct task_struct *p); + void (*moved_group) (struct task_struct *p, int on_rq); #endif }; @@ -2594,8 +2601,6 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ -#define TASK_STATE_TO_CHAR_STR "RSDTtZX" - #endif /* __KERNEL__ */ #endif diff --git a/init/initramfs.c b/init/initramfs.c index 4c00edc5968..b37d34beb90 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -413,7 +413,7 @@ static unsigned my_inptr; /* index of next byte to be processed in inbuf */ static char * __init unpack_to_rootfs(char *buf, unsigned len) { - int written; + int written, res; decompress_fn decompress; const char *compress_name; static __initdata char msg_buf[64]; @@ -445,10 +445,12 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len) } this_header = 0; decompress = decompress_method(buf, len, &compress_name); - if (decompress) - decompress(buf, len, NULL, flush_buffer, NULL, + if (decompress) { + res = decompress(buf, len, NULL, flush_buffer, NULL, &my_inptr, error); - else if (compress_name) { + if (res) + error("decompressor failed"); + } else if (compress_name) { if (!message) { snprintf(msg_buf, sizeof msg_buf, "compression method %s not configured", diff --git a/init/main.c b/init/main.c index c3db4a98b36..dac44a9356a 100644 --- a/init/main.c +++ b/init/main.c @@ -369,12 +369,6 @@ static void __init smp_init(void) { unsigned int cpu; - /* - * Set up the current CPU as possible to migrate to. - * The other ones will be done by cpu_up/cpu_down() - */ - set_cpu_active(smp_processor_id(), true); - /* FIXME: This should be done in userspace --RR */ for_each_present_cpu(cpu) { if (num_online_cpus() >= setup_max_cpus) @@ -486,6 +480,7 @@ static void __init boot_cpu_init(void) int cpu = smp_processor_id(); /* Mark the boot cpu "present", "online" etc for SMP and UP case */ set_cpu_online(cpu, true); + set_cpu_active(cpu, true); set_cpu_present(cpu, true); set_cpu_possible(cpu, true); } diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2451dc6f328..4b05bd9479d 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -277,7 +277,7 @@ static void untag_chunk(struct node *p) owner->root = NULL; } - for (i = j = 0; i < size; i++, j++) { + for (i = j = 0; j <= size; i++, j++) { struct audit_tree *s; if (&chunk->owners[j] == p) { list_del_init(&p->list); @@ -290,7 +290,7 @@ static void untag_chunk(struct node *p) if (!s) /* result of earlier fallback */ continue; get_tree(s); - list_replace_init(&chunk->owners[i].list, &new->owners[j].list); + list_replace_init(&chunk->owners[j].list, &new->owners[i].list); } list_replace_rcu(&chunk->hash, &new->hash); @@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) for (n = 0; n < old->count; n++) { if (old->owners[n].owner == tree) { spin_unlock(&hash_lock); - put_inotify_watch(watch); + put_inotify_watch(&old->watch); return 0; } } spin_unlock(&hash_lock); chunk = alloc_chunk(old->count + 1); - if (!chunk) + if (!chunk) { + put_inotify_watch(&old->watch); return -ENOMEM; + } mutex_lock(&inode->inotify_mutex); if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) { @@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); inotify_evict_watch(&old->watch); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(&old->watch); + put_inotify_watch(&old->watch); /* pair to inotify_find_watch */ + put_inotify_watch(&old->watch); /* and kill it */ return 0; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 291ac586f37..1c8ddd6ee94 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -209,6 +209,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) return -ENOMEM; cpu_hotplug_begin(); + set_cpu_active(cpu, false); err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); if (err == NOTIFY_BAD) { @@ -280,18 +281,6 @@ int __ref cpu_down(unsigned int cpu) goto out; } - set_cpu_active(cpu, false); - - /* - * Make sure the all cpus did the reschedule and are not - * using stale version of the cpu_active_mask. - * This is not strictly necessary becuase stop_machine() - * that we run down the line already provides the required - * synchronization. But it's really a side effect and we do not - * want to depend on the innards of the stop_machine here. - */ - synchronize_sched(); - err = _cpu_down(cpu, 0); out: @@ -382,19 +371,12 @@ int disable_nonboot_cpus(void) return error; cpu_maps_update_begin(); first_cpu = cpumask_first(cpu_online_mask); - /* We take down all of the non-boot CPUs in one shot to avoid races + /* + * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); - for_each_online_cpu(cpu) { - if (cpu == first_cpu) - continue; - set_cpu_active(cpu, false); - } - - synchronize_sched(); - printk("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { if (cpu == first_cpu) diff --git a/kernel/kthread.c b/kernel/kthread.c index ab7ae57773e..fbb6222fe7e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -150,6 +150,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), EXPORT_SYMBOL(kthread_create); /** + * kthread_bind - bind a just-created kthread to a cpu. + * @p: thread created by kthread_create(). + * @cpu: cpu (might not be online, must be possible) for @k to run on. + * + * Description: This function is equivalent to set_cpus_allowed(), + * except that @cpu doesn't need to be online, and the thread must be + * stopped (i.e., just returned from kthread_create()). + */ +void kthread_bind(struct task_struct *p, unsigned int cpu) +{ + /* Must have done schedule() in kthread() before we set_task_cpu */ + if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { + WARN_ON(1); + return; + } + + p->cpus_allowed = cpumask_of_cpu(cpu); + p->rt.nr_cpus_allowed = 1; + p->flags |= PF_THREAD_BOUND; +} +EXPORT_SYMBOL(kthread_bind); + +/** * kthread_stop - stop a thread created by kthread_create(). * @k: thread created by kthread_create(). * diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 97d1a3dd7a5..e0eb4a2fe18 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1381,6 +1381,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) if (event->state != PERF_EVENT_STATE_ACTIVE) continue; + if (event->cpu != -1 && event->cpu != smp_processor_id()) + continue; + hwc = &event->hw; interrupts = hwc->interrupts; @@ -3265,6 +3268,9 @@ static void perf_event_task_output(struct perf_event *event, static int perf_event_task_match(struct perf_event *event) { + if (event->cpu != -1 && event->cpu != smp_processor_id()) + return 0; + if (event->attr.comm || event->attr.mmap || event->attr.task) return 1; @@ -3290,12 +3296,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_task_ctx(&cpuctx->ctx, task_event); - put_cpu_var(perf_cpu_context); - if (!ctx) ctx = rcu_dereference(task_event->task->perf_event_ctxp); if (ctx) perf_event_task_ctx(ctx, task_event); + put_cpu_var(perf_cpu_context); rcu_read_unlock(); } @@ -3372,6 +3377,9 @@ static void perf_event_comm_output(struct perf_event *event, static int perf_event_comm_match(struct perf_event *event) { + if (event->cpu != -1 && event->cpu != smp_processor_id()) + return 0; + if (event->attr.comm) return 1; @@ -3408,15 +3416,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_comm_ctx(&cpuctx->ctx, comm_event); - put_cpu_var(perf_cpu_context); - - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_comm_ctx(ctx, comm_event); + put_cpu_var(perf_cpu_context); rcu_read_unlock(); } @@ -3491,6 +3494,9 @@ static void perf_event_mmap_output(struct perf_event *event, static int perf_event_mmap_match(struct perf_event *event, struct perf_mmap_event *mmap_event) { + if (event->cpu != -1 && event->cpu != smp_processor_id()) + return 0; + if (event->attr.mmap) return 1; @@ -3564,15 +3570,10 @@ got_name: rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); - put_cpu_var(perf_cpu_context); - - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_mmap_ctx(ctx, mmap_event); + put_cpu_var(perf_cpu_context); rcu_read_unlock(); kfree(buf); @@ -3863,6 +3864,9 @@ static int perf_swevent_match(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { + if (event->cpu != -1 && event->cpu != smp_processor_id()) + return 0; + if (!perf_swevent_is_counting(event)) return 0; diff --git a/kernel/sched.c b/kernel/sched.c index 18cceeecce3..720df108a2d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -26,6 +26,8 @@ * Thomas Gleixner, Mike Kravetz */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/mm.h> #include <linux/module.h> #include <linux/nmi.h> @@ -2002,39 +2004,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, p->sched_class->prio_changed(rq, p, oldprio, running); } -/** - * kthread_bind - bind a just-created kthread to a cpu. - * @p: thread created by kthread_create(). - * @cpu: cpu (might not be online, must be possible) for @k to run on. - * - * Description: This function is equivalent to set_cpus_allowed(), - * except that @cpu doesn't need to be online, and the thread must be - * stopped (i.e., just returned from kthread_create()). - * - * Function lives here instead of kthread.c because it messes with - * scheduler internals which require locking. - */ -void kthread_bind(struct task_struct *p, unsigned int cpu) -{ - struct rq *rq = cpu_rq(cpu); - unsigned long flags; - - /* Must have done schedule() in kthread() before we set_task_cpu */ - if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { - WARN_ON(1); - return; - } - - raw_spin_lock_irqsave(&rq->lock, flags); - update_rq_clock(rq); - set_task_cpu(p, cpu); - p->cpus_allowed = cpumask_of_cpu(cpu); - p->rt.nr_cpus_allowed = 1; - p->flags |= PF_THREAD_BOUND; - raw_spin_unlock_irqrestore(&rq->lock, flags); -} -EXPORT_SYMBOL(kthread_bind); - #ifdef CONFIG_SMP /* * Is this task likely cache-hot: @@ -2044,6 +2013,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) { s64 delta; + if (p->sched_class != &fair_sched_class) + return 0; + /* * Buddy candidates are cache hot: */ @@ -2052,9 +2024,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) &p->se == cfs_rq_of(&p->se)->last)) return 1; - if (p->sched_class != &fair_sched_class) - return 0; - if (sysctl_sched_migration_cost == -1) return 1; if (sysctl_sched_migration_cost == 0) @@ -2065,22 +2034,24 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) return delta < (s64)sysctl_sched_migration_cost; } - void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { - int old_cpu = task_cpu(p); - struct cfs_rq *old_cfsrq = task_cfs_rq(p), - *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); +#ifdef CONFIG_SCHED_DEBUG + /* + * We should never call set_task_cpu() on a blocked task, + * ttwu() will sort out the placement. + */ + WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && + !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); +#endif trace_sched_migrate_task(p, new_cpu); - if (old_cpu != new_cpu) { - p->se.nr_migrations++; - perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, - 1, 1, NULL, 0); - } - p->se.vruntime -= old_cfsrq->min_vruntime - - new_cfsrq->min_vruntime; + if (task_cpu(p) == new_cpu) + return; + + p->se.nr_migrations++; + perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); __set_task_cpu(p, new_cpu); } @@ -2105,13 +2076,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) /* * If the task is not on a runqueue (and not running), then - * it is sufficient to simply update the task's cpu field. + * the next wake-up will properly place the task. */ - if (!p->se.on_rq && !task_running(rq, p)) { - update_rq_clock(rq); - set_task_cpu(p, dest_cpu); + if (!p->se.on_rq && !task_running(rq, p)) return 0; - } init_completion(&req->done); req->task = p; @@ -2317,10 +2285,73 @@ void task_oncpu_function_call(struct task_struct *p, } #ifdef CONFIG_SMP +static int select_fallback_rq(int cpu, struct task_struct *p) +{ + int dest_cpu; + const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); + + /* Look for allowed, online CPU in same node. */ + for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) + if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + return dest_cpu; + + /* Any allowed, online CPU? */ + dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); + if (dest_cpu < nr_cpu_ids) + return dest_cpu; + + /* No more Mr. Nice Guy. */ + if (dest_cpu >= nr_cpu_ids) { + rcu_read_lock(); + cpuset_cpus_allowed_locked(p, &p->cpus_allowed); + rcu_read_unlock(); + dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); + + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); + } + } + + return dest_cpu; +} + +/* + * Called from: + * + * - fork, @p is stable because it isn't on the tasklist yet + * + * - exec, @p is unstable, retry loop + * + * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so + * we should be good. + */ static inline int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) { - return p->sched_class->select_task_rq(p, sd_flags, wake_flags); + int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); + + /* + * In order not to call set_task_cpu() on a blocking task we need + * to rely on ttwu() to place the task on a valid ->cpus_allowed + * cpu. + * + * Since this is common to all placement strategies, this lives here. + * + * [ this allows ->select_task() to simply return task_cpu(p) and + * not worry about this generic constraint ] + */ + if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || + !cpu_active(cpu))) + cpu = select_fallback_rq(task_cpu(p), p); + + return cpu; } #endif @@ -2375,6 +2406,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, if (task_contributes_to_load(p)) rq->nr_uninterruptible--; p->state = TASK_WAKING; + + if (p->sched_class->task_waking) + p->sched_class->task_waking(rq, p); + __task_rq_unlock(rq); cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); @@ -2438,8 +2473,8 @@ out_running: p->state = TASK_RUNNING; #ifdef CONFIG_SMP - if (p->sched_class->task_wake_up) - p->sched_class->task_wake_up(rq, p); + if (p->sched_class->task_woken) + p->sched_class->task_woken(rq, p); if (unlikely(rq->idle_stamp)) { u64 delta = rq->clock - rq->idle_stamp; @@ -2538,14 +2573,6 @@ static void __sched_fork(struct task_struct *p) #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); #endif - - /* - * We mark the process as running here, but have not actually - * inserted it onto the runqueue yet. This guarantees that - * nobody will actually run it, and a signal or other external - * event cannot wake it up and insert it on the runqueue either. - */ - p->state = TASK_RUNNING; } /* @@ -2556,6 +2583,12 @@ void sched_fork(struct task_struct *p, int clone_flags) int cpu = get_cpu(); __sched_fork(p); + /* + * We mark the process as waking here. This guarantees that + * nobody will actually run it, and a signal or other external + * event cannot wake it up and insert it on the runqueue either. + */ + p->state = TASK_WAKING; /* * Revert to default priority/policy on fork if requested. @@ -2624,14 +2657,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) struct rq *rq; rq = task_rq_lock(p, &flags); - BUG_ON(p->state != TASK_RUNNING); + BUG_ON(p->state != TASK_WAKING); + p->state = TASK_RUNNING; update_rq_clock(rq); activate_task(rq, p, 0); trace_sched_wakeup_new(rq, p, 1); check_preempt_curr(rq, p, WF_FORK); #ifdef CONFIG_SMP - if (p->sched_class->task_wake_up) - p->sched_class->task_wake_up(rq, p); + if (p->sched_class->task_woken) + p->sched_class->task_woken(rq, p); #endif task_rq_unlock(rq, &flags); } @@ -3101,21 +3135,36 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) } /* - * If dest_cpu is allowed for this process, migrate the task to it. - * This is accomplished by forcing the cpu_allowed mask to only - * allow dest_cpu, which will force the cpu onto dest_cpu. Then - * the cpu_allowed mask is restored. + * sched_exec - execve() is a valuable balancing opportunity, because at + * this point the task has the smallest effective memory and cache footprint. */ -static void sched_migrate_task(struct task_struct *p, int dest_cpu) +void sched_exec(void) { + struct task_struct *p = current; struct migration_req req; + int dest_cpu, this_cpu; unsigned long flags; struct rq *rq; +again: + this_cpu = get_cpu(); + dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0); + if (dest_cpu == this_cpu) { + put_cpu(); + return; + } + rq = task_rq_lock(p, &flags); + put_cpu(); + + /* + * select_task_rq() can race against ->cpus_allowed + */ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) - || unlikely(!cpu_active(dest_cpu))) - goto out; + || unlikely(!cpu_active(dest_cpu))) { + task_rq_unlock(rq, &flags); + goto again; + } /* force the process onto the specified CPU */ if (migrate_task(p, dest_cpu, &req)) { @@ -3130,24 +3179,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) return; } -out: task_rq_unlock(rq, &flags); } /* - * sched_exec - execve() is a valuable balancing opportunity, because at - * this point the task has the smallest effective memory and cache footprint. - */ -void sched_exec(void) -{ - int new_cpu, this_cpu = get_cpu(); - new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); - put_cpu(); - if (new_cpu != this_cpu) - sched_migrate_task(current, new_cpu); -} - -/* * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */ @@ -5340,8 +5375,8 @@ static noinline void __schedule_bug(struct task_struct *prev) { struct pt_regs *regs = get_irq_regs(); - printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", - prev->comm, prev->pid, preempt_count()); + pr_err("BUG: scheduling while atomic: %s/%d/0x%08x\n", + prev->comm, prev->pid, preempt_count()); debug_show_held_locks(prev); print_modules(); @@ -5911,14 +5946,15 @@ EXPORT_SYMBOL(wait_for_completion_killable); */ bool try_wait_for_completion(struct completion *x) { + unsigned long flags; int ret = 1; - spin_lock_irq(&x->wait.lock); + spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = 0; else x->done--; - spin_unlock_irq(&x->wait.lock); + spin_unlock_irqrestore(&x->wait.lock, flags); return ret; } EXPORT_SYMBOL(try_wait_for_completion); @@ -5933,12 +5969,13 @@ EXPORT_SYMBOL(try_wait_for_completion); */ bool completion_done(struct completion *x) { + unsigned long flags; int ret = 1; - spin_lock_irq(&x->wait.lock); + spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = 0; - spin_unlock_irq(&x->wait.lock); + spin_unlock_irqrestore(&x->wait.lock, flags); return ret; } EXPORT_SYMBOL(completion_done); @@ -6457,7 +6494,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) return -EINVAL; retval = -ESRCH; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_process_by_pid(pid); if (p) { retval = security_task_getscheduler(p); @@ -6465,7 +6502,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) retval = p->policy | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return retval; } @@ -6483,7 +6520,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) if (!param || pid < 0) return -EINVAL; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_process_by_pid(pid); retval = -ESRCH; if (!p) @@ -6494,7 +6531,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) goto out_unlock; lp.sched_priority = p->rt_priority; - read_unlock(&tasklist_lock); + rcu_read_unlock(); /* * This one might sleep, we cannot do it with a spinlock held ... @@ -6504,7 +6541,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) return retval; out_unlock: - read_unlock(&tasklist_lock); + rcu_read_unlock(); return retval; } @@ -6515,22 +6552,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) int retval; get_online_cpus(); - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { - read_unlock(&tasklist_lock); + rcu_read_unlock(); put_online_cpus(); return -ESRCH; } - /* - * It is not safe to call set_cpus_allowed with the - * tasklist_lock held. We will bump the task_struct's - * usage count and then drop tasklist_lock. - */ + /* Prevent p going away */ get_task_struct(p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { retval = -ENOMEM; @@ -6616,7 +6649,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) int retval; get_online_cpus(); - read_lock(&tasklist_lock); + rcu_read_lock(); retval = -ESRCH; p = find_process_by_pid(pid); @@ -6632,7 +6665,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) task_rq_unlock(rq, &flags); out_unlock: - read_unlock(&tasklist_lock); + rcu_read_unlock(); put_online_cpus(); return retval; @@ -6876,7 +6909,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, return -EINVAL; retval = -ESRCH; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_process_by_pid(pid); if (!p) goto out_unlock; @@ -6889,13 +6922,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, time_slice = p->sched_class->get_rr_interval(rq, p); task_rq_unlock(rq, &flags); - read_unlock(&tasklist_lock); + rcu_read_unlock(); jiffies_to_timespec(time_slice, &t); retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; return retval; out_unlock: - read_unlock(&tasklist_lock); + rcu_read_unlock(); return retval; } @@ -6907,23 +6940,23 @@ void sched_show_task(struct task_struct *p) unsigned state; state = p->state ? __ffs(p->state) + 1 : 0; - printk(KERN_INFO "%-13.13s %c", p->comm, + pr_info("%-13.13s %c", p->comm, state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); #if BITS_PER_LONG == 32 if (state == TASK_RUNNING) - printk(KERN_CONT " running "); + pr_cont(" running "); else - printk(KERN_CONT " %08lx ", thread_saved_pc(p)); + pr_cont(" %08lx ", thread_saved_pc(p)); #else if (state == TASK_RUNNING) - printk(KERN_CONT " running task "); + pr_cont(" running task "); else - printk(KERN_CONT " %016lx ", thread_saved_pc(p)); + pr_cont(" %016lx ", thread_saved_pc(p)); #endif #ifdef CONFIG_DEBUG_STACK_USAGE free = stack_not_used(p); #endif - printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, + pr_cont("%5lu %5d %6d 0x%08lx\n", free, task_pid_nr(p), task_pid_nr(p->real_parent), (unsigned long)task_thread_info(p)->flags); @@ -6935,11 +6968,9 @@ void show_state_filter(unsigned long state_filter) struct task_struct *g, *p; #if BITS_PER_LONG == 32 - printk(KERN_INFO - " task PC stack pid father\n"); + pr_info(" task PC stack pid father\n"); #else - printk(KERN_INFO - " task PC stack pid father\n"); + pr_info(" task PC stack pid father\n"); #endif read_lock(&tasklist_lock); do_each_thread(g, p) { @@ -6986,6 +7017,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) raw_spin_lock_irqsave(&rq->lock, flags); __sched_fork(idle); + idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); @@ -7100,7 +7132,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) struct rq *rq; int ret = 0; + /* + * Since we rely on wake-ups to migrate sleeping tasks, don't change + * the ->cpus_allowed mask from under waking tasks, which would be + * possible when we change rq->lock in ttwu(), so synchronize against + * TASK_WAKING to avoid that. + */ +again: + while (p->state == TASK_WAKING) + cpu_relax(); + rq = task_rq_lock(p, &flags); + + if (p->state == TASK_WAKING) { + task_rq_unlock(rq, &flags); + goto again; + } + if (!cpumask_intersects(new_mask, cpu_active_mask)) { ret = -EINVAL; goto out; @@ -7156,7 +7204,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) { struct rq *rq_dest, *rq_src; - int ret = 0, on_rq; + int ret = 0; if (unlikely(!cpu_active(dest_cpu))) return ret; @@ -7172,12 +7220,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) goto fail; - on_rq = p->se.on_rq; - if (on_rq) + /* + * If we're not on a rq, the next wake-up will ensure we're + * placed properly. + */ + if (p->se.on_rq) { deactivate_task(rq_src, p, 0); - - set_task_cpu(p, dest_cpu); - if (on_rq) { + set_task_cpu(p, dest_cpu); activate_task(rq_dest, p, 0); check_preempt_curr(rq_dest, p, 0); } @@ -7273,37 +7322,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { int dest_cpu; - const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu)); again: - /* Look for allowed, online CPU in same node. */ - for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) - goto move; - - /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); - if (dest_cpu < nr_cpu_ids) - goto move; - - /* No more Mr. Nice Guy. */ - if (dest_cpu >= nr_cpu_ids) { - cpuset_cpus_allowed_locked(p, &p->cpus_allowed); - dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); - - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu%d\n", - task_pid_nr(p), p->comm, dead_cpu); - } - } + dest_cpu = select_fallback_rq(dead_cpu, p); -move: /* It can have affinity changed while we were choosing. */ if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) goto again; @@ -7806,48 +7828,44 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, printk(KERN_DEBUG "%*s domain %d: ", level, "", level); if (!(sd->flags & SD_LOAD_BALANCE)) { - printk("does not load-balance\n"); + pr_cont("does not load-balance\n"); if (sd->parent) - printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" - " has parent"); + pr_err("ERROR: !SD_LOAD_BALANCE domain has parent\n"); return -1; } - printk(KERN_CONT "span %s level %s\n", str, sd->name); + pr_cont("span %s level %s\n", str, sd->name); if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { - printk(KERN_ERR "ERROR: domain->span does not contain " - "CPU%d\n", cpu); + pr_err("ERROR: domain->span does not contain CPU%d\n", cpu); } if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) { - printk(KERN_ERR "ERROR: domain->groups does not contain" - " CPU%d\n", cpu); + pr_err("ERROR: domain->groups does not contain CPU%d\n", cpu); } printk(KERN_DEBUG "%*s groups:", level + 1, ""); do { if (!group) { - printk("\n"); - printk(KERN_ERR "ERROR: group is NULL\n"); + pr_cont("\n"); + pr_err("ERROR: group is NULL\n"); break; } if (!group->cpu_power) { - printk(KERN_CONT "\n"); - printk(KERN_ERR "ERROR: domain->cpu_power not " - "set\n"); + pr_cont("\n"); + pr_err("ERROR: domain->cpu_power not set\n"); break; } if (!cpumask_weight(sched_group_cpus(group))) { - printk(KERN_CONT "\n"); - printk(KERN_ERR "ERROR: empty group\n"); + pr_cont("\n"); + pr_err("ERROR: empty group\n"); break; } if (cpumask_intersects(groupmask, sched_group_cpus(group))) { - printk(KERN_CONT "\n"); - printk(KERN_ERR "ERROR: repeated CPUs\n"); + pr_cont("\n"); + pr_err("ERROR: repeated CPUs\n"); break; } @@ -7855,23 +7873,21 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); - printk(KERN_CONT " %s", str); + pr_cont(" %s", str); if (group->cpu_power != SCHED_LOAD_SCALE) { - printk(KERN_CONT " (cpu_power = %d)", - group->cpu_power); + pr_cont(" (cpu_power = %d)", group->cpu_power); } group = group->next; } while (group != sd->groups); - printk(KERN_CONT "\n"); + pr_cont("\n"); if (!cpumask_equal(sched_domain_span(sd), groupmask)) - printk(KERN_ERR "ERROR: groups don't span domain->span\n"); + pr_err("ERROR: groups don't span domain->span\n"); if (sd->parent && !cpumask_subset(groupmask, sched_domain_span(sd->parent))) - printk(KERN_ERR "ERROR: parent span is not a superset " - "of domain->span\n"); + pr_err("ERROR: parent span is not a superset of domain->span\n"); return 0; } @@ -8427,8 +8443,7 @@ static int build_numa_sched_groups(struct s_data *d, sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), GFP_KERNEL, num); if (!sg) { - printk(KERN_WARNING "Can not alloc domain group for node %d\n", - num); + pr_warning("Can not alloc domain group for node %d\n", num); return -ENOMEM; } d->sched_group_nodes[num] = sg; @@ -8457,8 +8472,8 @@ static int build_numa_sched_groups(struct s_data *d, sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), GFP_KERNEL, num); if (!sg) { - printk(KERN_WARNING - "Can not alloc domain group for node %d\n", j); + pr_warning("Can not alloc domain group for node %d\n", + j); return -ENOMEM; } sg->cpu_power = 0; @@ -8686,7 +8701,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, d->sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *), GFP_KERNEL); if (!d->sched_group_nodes) { - printk(KERN_WARNING "Can not alloc sched group node list\n"); + pr_warning("Can not alloc sched group node list\n"); return sa_notcovered; } sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes; @@ -8703,7 +8718,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, return sa_send_covered; d->rd = alloc_rootdomain(); if (!d->rd) { - printk(KERN_WARNING "Cannot alloc root domain\n"); + pr_warning("Cannot alloc root domain\n"); return sa_tmpmask; } return sa_rootdomain; @@ -9668,7 +9683,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP static inline int preempt_count_equals(int preempt_offset) { - int nested = preempt_count() & ~PREEMPT_ACTIVE; + int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); } @@ -9685,13 +9700,11 @@ void __might_sleep(char *file, int line, int preempt_offset) return; prev_jiffy = jiffies; - printk(KERN_ERR - "BUG: sleeping function called from invalid context at %s:%d\n", - file, line); - printk(KERN_ERR - "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), - current->pid, current->comm); + pr_err("BUG: sleeping function called from invalid context at %s:%d\n", + file, line); + pr_err("in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), + current->pid, current->comm); debug_show_held_locks(current); if (irqs_disabled()) @@ -10083,7 +10096,7 @@ void sched_move_task(struct task_struct *tsk) #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->moved_group) - tsk->sched_class->moved_group(tsk); + tsk->sched_class->moved_group(tsk, on_rq); #endif if (unlikely(running)) diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 479ce5682d7..5b496132c28 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -236,6 +236,18 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) } EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); +unsigned long long cpu_clock(int cpu) +{ + unsigned long long clock; + unsigned long flags; + + local_irq_save(flags); + clock = sched_clock_cpu(cpu); + local_irq_restore(flags); + + return clock; +} + #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ void sched_clock_init(void) @@ -251,17 +263,12 @@ u64 sched_clock_cpu(int cpu) return sched_clock(); } -#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ unsigned long long cpu_clock(int cpu) { - unsigned long long clock; - unsigned long flags; + return sched_clock_cpu(cpu); +} - local_irq_save(flags); - clock = sched_clock_cpu(cpu); - local_irq_restore(flags); +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ - return clock; -} EXPORT_SYMBOL_GPL(cpu_clock); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5bedf6e3ebf..42ac3c9f66f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq, exec_clock, delta_exec); delta_exec_weighted = calc_delta_fair(delta_exec, curr); + curr->vruntime += delta_exec_weighted; update_min_vruntime(cfs_rq); } @@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) se->vruntime = vruntime; } +#define ENQUEUE_WAKEUP 1 +#define ENQUEUE_MIGRATE 2 + static void -enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) +enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { /* + * Update the normalized vruntime before updating min_vruntime + * through callig update_curr(). + */ + if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE)) + se->vruntime += cfs_rq->min_vruntime; + + /* * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); account_entity_enqueue(cfs_rq, se); - if (wakeup) { + if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); enqueue_sleeper(cfs_rq, se); } @@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); update_min_vruntime(cfs_rq); + + /* + * Normalize the entity after updating the min_vruntime because the + * update can refer to the ->curr item and we need to reflect this + * movement in our normalized position. + */ + if (!sleep) + se->vruntime -= cfs_rq->min_vruntime; } /* @@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int flags = 0; + + if (wakeup) + flags |= ENQUEUE_WAKEUP; + if (p->state == TASK_WAKING) + flags |= ENQUEUE_MIGRATE; for_each_sched_entity(se) { if (se->on_rq) break; cfs_rq = cfs_rq_of(se); - enqueue_entity(cfs_rq, se, wakeup); - wakeup = 1; + enqueue_entity(cfs_rq, se, flags); + flags = ENQUEUE_WAKEUP; } hrtick_update(rq); @@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq) #ifdef CONFIG_SMP +static void task_waking_fair(struct rq *rq, struct task_struct *p) +{ + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + se->vruntime -= cfs_rq->min_vruntime; +} + #ifdef CONFIG_FAIR_GROUP_SCHED /* * effective_load() calculates the load change as seen from the root_task_group @@ -1429,6 +1462,9 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag } for_each_domain(cpu, tmp) { + if (!(tmp->flags & SD_LOAD_BALANCE)) + continue; + /* * If power savings logic is enabled for a domain, see if we * are not overloaded, if so, don't balance wider. @@ -1975,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p) resched_task(rq->curr); } + se->vruntime -= cfs_rq->min_vruntime; + raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -2028,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq) } #ifdef CONFIG_FAIR_GROUP_SCHED -static void moved_group_fair(struct task_struct *p) +static void moved_group_fair(struct task_struct *p, int on_rq) { struct cfs_rq *cfs_rq = task_cfs_rq(p); update_curr(cfs_rq); - place_entity(cfs_rq, &p->se, 1); + if (!on_rq) + place_entity(cfs_rq, &p->se, 1); } #endif @@ -2073,6 +2112,8 @@ static const struct sched_class fair_sched_class = { .move_one_task = move_one_task_fair, .rq_online = rq_online_fair, .rq_offline = rq_offline_fair, + + .task_waking = task_waking_fair, #endif .set_curr_task = set_curr_task_fair, diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 5f93b570d38..21b969a2872 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -35,7 +35,7 @@ static void dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) { raw_spin_unlock_irq(&rq->lock); - printk(KERN_ERR "bad: scheduling from the idle thread!\n"); + pr_err("bad: scheduling from the idle thread!\n"); dump_stack(); raw_spin_lock_irq(&rq->lock); } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d2ea2828164..f48328ac216 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq) * If we are not running and we are not going to reschedule soon, we should * try to push tasks away now */ -static void task_wake_up_rt(struct rq *rq, struct task_struct *p) +static void task_woken_rt(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && @@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = { .rq_offline = rq_offline_rt, .pre_schedule = pre_schedule_rt, .post_schedule = post_schedule_rt, - .task_wake_up = task_wake_up_rt, + .task_woken = task_woken_rt, .switched_from = switched_from_rt, #endif diff --git a/kernel/signal.c b/kernel/signal.c index 1814e68e4de..d09692b4037 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -218,13 +218,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi struct user_struct *user; /* - * We won't get problems with the target's UID changing under us - * because changing it requires RCU be used, and if t != current, the - * caller must be holding the RCU readlock (by way of a spinlock) and - * we use RCU protection here + * Protect access to @t credentials. This can go away when all + * callers hold rcu read lock. */ + rcu_read_lock(); user = get_uid(__task_cred(t)->user); atomic_inc(&user->sigpending); + rcu_read_unlock(); if (override_rlimit || atomic_read(&user->sigpending) <= @@ -1179,11 +1179,12 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, int ret = -EINVAL; struct task_struct *p; const struct cred *pcred; + unsigned long flags; if (!valid_signal(sig)) return ret; - read_lock(&tasklist_lock); + rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (!p) { ret = -ESRCH; @@ -1199,14 +1200,16 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, ret = security_task_kill(p, info, sig, secid); if (ret) goto out_unlock; - if (sig && p->sighand) { - unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - ret = __send_signal(sig, info, p, 1, 0); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + + if (sig) { + if (lock_task_sighand(p, &flags)) { + ret = __send_signal(sig, info, p, 1, 0); + unlock_task_sighand(p, &flags); + } else + ret = -ESRCH; } out_unlock: - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); diff --git a/kernel/sys.c b/kernel/sys.c index 20ccfb5da6a..26a6b73a6b8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -162,6 +162,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) if (niceval > 19) niceval = 19; + rcu_read_lock(); read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: @@ -199,6 +200,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) } out_unlock: read_unlock(&tasklist_lock); + rcu_read_unlock(); out: return error; } diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 3d5fc0fd1cc..6f740d9f094 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -238,8 +238,9 @@ void clockevents_exchange_device(struct clock_event_device *old, */ void clockevents_notify(unsigned long reason, void *arg) { - struct list_head *node, *tmp; + struct clock_event_device *dev, *tmp; unsigned long flags; + int cpu; raw_spin_lock_irqsave(&clockevents_lock, flags); clockevents_do_notify(reason, arg); @@ -250,8 +251,19 @@ void clockevents_notify(unsigned long reason, void *arg) * Unregister the clock event devices which were * released from the users in the notify chain. */ - list_for_each_safe(node, tmp, &clockevents_released) - list_del(node); + list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + list_del(&dev->list); + /* + * Now check whether the CPU has left unused per cpu devices + */ + cpu = *((int *)arg); + list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { + if (cpumask_test_cpu(cpu, dev->cpumask) && + cpumask_weight(dev->cpumask) == 1) { + BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + list_del(&dev->list); + } + } break; default: break; diff --git a/kernel/timer.c b/kernel/timer.c index 5db5a8d2681..15533b79239 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_activate(timer, expires); - new_base = __get_cpu_var(tvec_bases); - cpu = smp_processor_id(); #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7ecab06547a..375f81a568d 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -282,6 +282,18 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); +/* Check the name is good for event/group */ +static int check_event_name(const char *name) +{ + if (!isalpha(*name) && *name != '_') + return 0; + while (*++name != '\0') { + if (!isalpha(*name) && !isdigit(*name) && *name != '_') + return 0; + } + return 1; +} + /* * Allocate new trace_probe and initialize it (including kprobes). */ @@ -293,10 +305,11 @@ static struct trace_probe *alloc_trace_probe(const char *group, int nargs, int is_return) { struct trace_probe *tp; + int ret = -ENOMEM; tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); if (!tp) - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); if (symbol) { tp->symbol = kstrdup(symbol, GFP_KERNEL); @@ -312,14 +325,20 @@ static struct trace_probe *alloc_trace_probe(const char *group, else tp->rp.kp.pre_handler = kprobe_dispatcher; - if (!event) + if (!event || !check_event_name(event)) { + ret = -EINVAL; goto error; + } + tp->call.name = kstrdup(event, GFP_KERNEL); if (!tp->call.name) goto error; - if (!group) + if (!group || !check_event_name(group)) { + ret = -EINVAL; goto error; + } + tp->call.system = kstrdup(group, GFP_KERNEL); if (!tp->call.system) goto error; @@ -330,7 +349,7 @@ error: kfree(tp->call.name); kfree(tp->symbol); kfree(tp); - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); } static void free_probe_arg(struct probe_arg *arg) @@ -695,10 +714,10 @@ static int create_trace_probe(int argc, char **argv) if (!event) { /* Make a new event name */ if (symbol) - snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", + snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", is_return ? 'r' : 'p', symbol, offset); else - snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", + snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", is_return ? 'r' : 'p', addr); event = buf; } diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index f6693969287..a7974a552ca 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, + .walk_stack = print_context_stack, }; static int diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 76074209f9a..a4e971dee10 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -637,6 +637,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, /* Allocate bunzip_data. Most fields initialize to zero. */ bd = *bdp = malloc(i); + if (!bd) + return RETVAL_OUT_OF_MEMORY; memset(bd, 0, sizeof(struct bunzip_data)); /* Setup input buffer */ bd->inbuf = inbuf; @@ -664,6 +666,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, bd->dbufSize = 100000*(i-BZh0); bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); + if (!bd->dbuf) + return RETVAL_OUT_OF_MEMORY; return RETVAL_OK; } @@ -686,7 +690,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, if (!outbuf) { error("Could not allocate output bufer"); - return -1; + return RETVAL_OUT_OF_MEMORY; } if (buf) inbuf = buf; @@ -694,6 +698,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, inbuf = malloc(BZIP2_IOBUF_SIZE); if (!inbuf) { error("Could not allocate input bufer"); + i = RETVAL_OUT_OF_MEMORY; goto exit_0; } i = start_bunzip(&bd, inbuf, len, fill); @@ -720,11 +725,14 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { error("Compressed file ends unexpectedly"); } + if (!bd) + goto exit_1; if (bd->dbuf) large_free(bd->dbuf); if (pos) *pos = bd->inbufPos; free(bd); +exit_1: if (!buf) free(inbuf); exit_0: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 74af449b1f1..4e869657cb5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3579,7 +3579,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, * then all holes in the requested range will be accounted for. */ -static unsigned long __meminit __absent_pages_in_range(int nid, +unsigned long __meminit __absent_pages_in_range(int nid, unsigned long range_start_pfn, unsigned long range_end_pfn) { @@ -4108,7 +4108,7 @@ static int __init cmp_node_active_region(const void *a, const void *b) } /* sort the node_map by start_pfn */ -static void __init sort_node_map(void) +void __init sort_node_map(void) { sort(early_node_map, (size_t)nr_nodemap_entries, sizeof(struct node_active_region), diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7814dbbd401..4390d225686 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -487,10 +487,11 @@ else msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); endif -ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) +ifneq ($(shell sh -c "(echo '\#ifndef _MIPS_SZLONG'; echo '\#define _MIPS_SZLONG 0'; echo '\#endif'; echo '\#include <dwarf.h>'; echo '\#include <libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -I/usr/include/libdwarf -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); BASIC_CFLAGS += -DNO_LIBDWARF else + BASIC_CFLAGS += -I/usr/include/libdwarf EXTLIBS += -lelf -ldwarf LIB_OBJS += util/probe-finder.o endif diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 7e741f54d79..c1e6774fd3e 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -38,6 +38,7 @@ #include "util/strlist.h" #include "util/event.h" #include "util/debug.h" +#include "util/debugfs.h" #include "util/symbol.h" #include "util/thread.h" #include "util/session.h" @@ -205,6 +206,9 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if ((!session.nr_probe && !session.dellist && !session.list_events)) usage_with_options(probe_usage, options); + if (debugfs_valid_mountpoint(debugfs_path) < 0) + die("Failed to find debugfs path."); + if (session.list_events) { if (session.nr_probe != 0 || session.dellist) { pr_warning(" Error: Don't use --list with" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e50a6b10ee6..5c2ab5357ec 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -224,7 +224,7 @@ static int __cmd_report(void) perf_session__collapse_resort(session); perf_session__output_resort(session, session->events_stats.total); - fprintf(stdout, "# Samples: %ld\n#\n", session->events_stats.total); + fprintf(stdout, "# Samples: %Ld\n#\n", session->events_stats.total); perf_session__fprintf_hists(session, NULL, false, stdout); if (sort_order == default_sort_order && parent_pattern == default_parent_pattern) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8027309b042..690a96d0467 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -95,8 +95,8 @@ typedef union event_union { } event_t; struct events_stats { - unsigned long total; - unsigned long lost; + u64 total; + u64 lost; }; void event__print_totals(void); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 2ca62154f79..29465d44004 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -62,6 +62,18 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) return ret; } +/* Check the name is good for event/group */ +static bool check_event_name(const char *name) +{ + if (!isalpha(*name) && *name != '_') + return false; + while (*++name != '\0') { + if (!isalpha(*name) && !isdigit(*name) && *name != '_') + return false; + } + return true; +} + /* Parse probepoint definition. */ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) { @@ -82,6 +94,9 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) ptr = strchr(arg, ':'); if (ptr) /* Group name is not supported yet. */ semantic_error("Group name is not supported yet."); + if (!check_event_name(arg)) + semantic_error("%s is bad for event name -it must " + "follow C symbol-naming rule.", arg); pp->event = strdup(arg); arg = tmp; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 5e4050ce296..a4086aaddb7 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -1,9 +1,9 @@ #ifndef _PROBE_FINDER_H #define _PROBE_FINDER_H -#define MAX_PATH_LEN 256 -#define MAX_PROBE_BUFFER 1024 -#define MAX_PROBES 128 +#define MAX_PATH_LEN 256 +#define MAX_PROBE_BUFFER 1024 +#define MAX_PROBES 128 static inline int is_c_varname(const char *name) { @@ -12,48 +12,53 @@ static inline int is_c_varname(const char *name) } struct probe_point { - char *event; /* Event name */ - char *group; /* Event group */ + char *event; /* Event name */ + char *group; /* Event group */ /* Inputs */ - char *file; /* File name */ - int line; /* Line number */ + char *file; /* File name */ + int line; /* Line number */ - char *function; /* Function name */ - int offset; /* Offset bytes */ + char *function; /* Function name */ + int offset; /* Offset bytes */ - int nr_args; /* Number of arguments */ - char **args; /* Arguments */ + int nr_args; /* Number of arguments */ + char **args; /* Arguments */ - int retprobe; /* Return probe */ + int retprobe; /* Return probe */ /* Output */ - int found; /* Number of found probe points */ - char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ + int found; /* Number of found probe points */ + char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ }; #ifndef NO_LIBDWARF extern int find_probepoint(int fd, struct probe_point *pp); -#include <libdwarf/dwarf.h> -#include <libdwarf/libdwarf.h> +/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */ +#ifndef _MIPS_SZLONG +# define _MIPS_SZLONG 0 +#endif + +#include <dwarf.h> +#include <libdwarf.h> struct probe_finder { - struct probe_point *pp; /* Target probe point */ + struct probe_point *pp; /* Target probe point */ /* For function searching */ - Dwarf_Addr addr; /* Address */ - Dwarf_Unsigned fno; /* File number */ - Dwarf_Unsigned lno; /* Line number */ - Dwarf_Off inl_offs; /* Inline offset */ - Dwarf_Die cu_die; /* Current CU */ + Dwarf_Addr addr; /* Address */ + Dwarf_Unsigned fno; /* File number */ + Dwarf_Unsigned lno; /* Line number */ + Dwarf_Off inl_offs; /* Inline offset */ + Dwarf_Die cu_die; /* Current CU */ /* For variable searching */ - Dwarf_Addr cu_base; /* Current CU base address */ - Dwarf_Locdesc fbloc; /* Location of Current Frame Base */ - const char *var; /* Current variable name */ - char *buf; /* Current output buffer */ - int len; /* Length of output buffer */ + Dwarf_Addr cu_base; /* Current CU base address */ + Dwarf_Locdesc fbloc; /* Location of Current Frame Base */ + const char *var; /* Current variable name */ + char *buf; /* Current output buffer */ + int len; /* Length of output buffer */ }; #endif /* NO_LIBDWARF */ |