diff options
109 files changed, 5446 insertions, 1239 deletions
@@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86_64 3.4.3 Kernel Configuration +# Linux/x86_64 3.4.4 Kernel Configuration # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -49,6 +49,7 @@ CONFIG_X86_HT=y CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" # CONFIG_KTIME_SCALAR is not set CONFIG_ARCH_CPU_PROBE_RELEASE=y +CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_HAVE_IRQ_WORK=y CONFIG_IRQ_WORK=y @@ -196,6 +197,7 @@ CONFIG_OPROFILE_NMI_TIMER=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y +CONFIG_UPROBES=y CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_KRETPROBES=y CONFIG_USER_RETURN_NOTIFIER=y @@ -3523,7 +3525,6 @@ CONFIG_VGA_ARB=y CONFIG_VGA_ARB_MAX_GPUS=16 CONFIG_VGA_SWITCHEROO=y CONFIG_DRM=m -CONFIG_DRM_USB=m CONFIG_DRM_KMS_HELPER=m # CONFIG_DRM_LOAD_EDID_FIRMWARE is not set CONFIG_DRM_TTM=m @@ -3552,7 +3553,7 @@ CONFIG_DRM_VMWGFX=m CONFIG_DRM_GMA500=m # CONFIG_DRM_GMA600 is not set CONFIG_DRM_GMA3600=y -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set # CONFIG_STUB_POULSBO is not set CONFIG_VGASTATE=m CONFIG_VIDEO_OUTPUT_CONTROL=m @@ -4860,6 +4861,8 @@ CONFIG_BRANCH_PROFILE_NONE=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y +CONFIG_PROBE_EVENTS=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FUNCTION_PROFILER=y CONFIG_FTRACE_MCOUNT_RECORD=y diff --git a/.config.old b/.config.old index f7923ec98e3..daa927f55a7 100644 --- a/.config.old +++ b/.config.old @@ -2447,7 +2447,7 @@ CONFIG_DRM_NOUVEAU_DEBUG=y # CONFIG_DRM_PSB is not set CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set CONFIG_DRM_VMWGFX=m CONFIG_DRM_VGEM=m @@ -4063,6 +4063,7 @@ CONFIG_FUNCTION_GRAPH_TRACER=y CONFIG_KPROBES=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y # CONFIG_KPROBES_SANITY_TEST is not set CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt new file mode 100644 index 00000000000..24ce6823a09 --- /dev/null +++ b/Documentation/trace/uprobetracer.txt @@ -0,0 +1,113 @@ + Uprobe-tracer: Uprobe-based Event Tracing + ========================================= + Documentation written by Srikar Dronamraju + +Overview +-------- +Uprobe based trace events are similar to kprobe based trace events. +To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y. + +Similar to the kprobe-event tracer, this doesn't need to be activated via +current_tracer. Instead of that, add probe points via +/sys/kernel/debug/tracing/uprobe_events, and enable it via +/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. + +However unlike kprobe-event tracer, the uprobe event interface expects the +user to calculate the offset of the probepoint in the object + +Synopsis of uprobe_tracer +------------------------- + p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe + + GRP : Group name. If omitted, use "uprobes" for it. + EVENT : Event name. If omitted, the event name is generated + based on SYMBOL+offs. + PATH : path to an executable or a library. + SYMBOL[+offs] : Symbol+offset where the probe is inserted. + + FETCHARGS : Arguments. Each probe can have up to 128 args. + %REG : Fetch register REG + +Event Profiling +--------------- + You can check the total number of probe hits and probe miss-hits via +/sys/kernel/debug/tracing/uprobe_profile. + The first column is event name, the second is the number of probe hits, +the third is the number of probe miss-hits. + +Usage examples +-------------- +To add a probe as a new event, write a new definition to uprobe_events +as below. + + echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events + + This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash + + echo > /sys/kernel/debug/tracing/uprobe_events + + This clears all probe points. + +The following example shows how to dump the instruction pointer and %ax +a register at the probed text address. Here we are trying to probe +function zfree in /bin/zsh + + # cd /sys/kernel/debug/tracing/ + # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp + 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh + # objdump -T /bin/zsh | grep -w zfree + 0000000000446420 g DF .text 0000000000000012 Base zfree + +0x46420 is the offset of zfree in object /bin/zsh that is loaded at +0x00400000. Hence the command to probe would be : + + # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events + +Please note: User has to explicitly calculate the offset of the probepoint +in the object. We can see the events that are registered by looking at the +uprobe_events file. + + # cat uprobe_events + p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax + +The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format + + # cat events/uprobes/p_zsh_0x46420/format + name: p_zsh_0x46420 + ID: 922 + format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + field:int common_padding; offset:8; size:4; signed:1; + + field:unsigned long __probe_ip; offset:12; size:4; signed:0; + field:u32 arg1; offset:16; size:4; signed:0; + field:u32 arg2; offset:20; size:4; signed:0; + + print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 + +Right after definition, each event is disabled by default. For tracing these +events, you need to enable it by: + + # echo 1 > events/uprobes/enable + +Lets disable the event after sleeping for some time. + # sleep 20 + # echo 0 > events/uprobes/enable + +And you can see the traced information via /sys/kernel/debug/tracing/trace. + + # cat trace + # tracer: nop + # + # TASK-PID CPU# TIMESTAMP FUNCTION + # | | | | | + zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 + zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 + zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 + zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 + +Each line shows us probes were triggered for a pid 24842 with ip being +0x446421 and contents of ax register being 79. @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 4 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Saber-toothed Squirrel diff --git a/arch/Kconfig b/arch/Kconfig index 684eb5af439..2880abf8a26 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -76,6 +76,23 @@ config OPTPROBES depends on KPROBES && HAVE_OPTPROBES depends on !PREEMPT +config UPROBES + bool "Transparent user-space probes (EXPERIMENTAL)" + depends on UPROBE_EVENT && PERF_EVENTS + default n + help + Uprobes is the user-space counterpart to kprobes: they + enable instrumentation applications (such as 'perf probe') + to establish unintrusive probes in user-space binaries and + libraries, by executing handler functions when the probes + are hit by user-space applications. + + ( These probes come in the form of single-byte breakpoints, + managed by the kernel and kept transparent to the probed + application. ) + + If in doubt, say "N". + config HAVE_EFFICIENT_UNALIGNED_ACCESS bool help diff --git a/arch/arm/mach-imx/crm-regs-imx5.h b/arch/arm/mach-imx/crm-regs-imx5.h index 5e11ba7daee..5e3f1f0f4ca 100644 --- a/arch/arm/mach-imx/crm-regs-imx5.h +++ b/arch/arm/mach-imx/crm-regs-imx5.h @@ -23,7 +23,7 @@ #define MX53_DPLL1_BASE MX53_IO_ADDRESS(MX53_PLL1_BASE_ADDR) #define MX53_DPLL2_BASE MX53_IO_ADDRESS(MX53_PLL2_BASE_ADDR) #define MX53_DPLL3_BASE MX53_IO_ADDRESS(MX53_PLL3_BASE_ADDR) -#define MX53_DPLL4_BASE MX53_IO_ADDRESS(MX53_PLL3_BASE_ADDR) +#define MX53_DPLL4_BASE MX53_IO_ADDRESS(MX53_PLL4_BASE_ADDR) /* PLL Register Offsets */ #define MXC_PLL_DP_CTL 0x00 diff --git a/arch/arm/mach-imx/hotplug.c b/arch/arm/mach-imx/hotplug.c index 89493abd497..20ed2d56c1a 100644 --- a/arch/arm/mach-imx/hotplug.c +++ b/arch/arm/mach-imx/hotplug.c @@ -12,6 +12,7 @@ #include <linux/errno.h> #include <asm/cacheflush.h> +#include <asm/cp15.h> #include <mach/common.h> int platform_cpu_kill(unsigned int cpu) @@ -19,6 +20,44 @@ int platform_cpu_kill(unsigned int cpu) return 1; } +static inline void cpu_enter_lowpower(void) +{ + unsigned int v; + + flush_cache_all(); + asm volatile( + "mcr p15, 0, %1, c7, c5, 0\n" + " mcr p15, 0, %1, c7, c10, 4\n" + /* + * Turn off coherency + */ + " mrc p15, 0, %0, c1, c0, 1\n" + " bic %0, %0, %3\n" + " mcr p15, 0, %0, c1, c0, 1\n" + " mrc p15, 0, %0, c1, c0, 0\n" + " bic %0, %0, %2\n" + " mcr p15, 0, %0, c1, c0, 0\n" + : "=&r" (v) + : "r" (0), "Ir" (CR_C), "Ir" (0x40) + : "cc"); +} + +static inline void cpu_leave_lowpower(void) +{ + unsigned int v; + + asm volatile( + "mrc p15, 0, %0, c1, c0, 0\n" + " orr %0, %0, %1\n" + " mcr p15, 0, %0, c1, c0, 0\n" + " mrc p15, 0, %0, c1, c0, 1\n" + " orr %0, %0, %2\n" + " mcr p15, 0, %0, c1, c0, 1\n" + : "=&r" (v) + : "Ir" (CR_C), "Ir" (0x40) + : "cc"); +} + /* * platform-specific code to shutdown a CPU * @@ -26,9 +65,10 @@ int platform_cpu_kill(unsigned int cpu) */ void platform_cpu_die(unsigned int cpu) { - flush_cache_all(); + cpu_enter_lowpower(); imx_enable_cpu(cpu, false); cpu_do_idle(); + cpu_leave_lowpower(); /* We should never return from idle */ panic("cpu %d unexpectedly exit from shutdown\n", cpu); diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c index e432d4acee1..4460d25faae 100644 --- a/arch/arm/mach-imx/mach-mx21ads.c +++ b/arch/arm/mach-imx/mach-mx21ads.c @@ -32,7 +32,7 @@ * Memory-mapped I/O on MX21ADS base board */ #define MX21ADS_MMIO_BASE_ADDR 0xf5000000 -#define MX21ADS_MMIO_SIZE SZ_16M +#define MX21ADS_MMIO_SIZE 0xc00000 #define MX21ADS_REG_ADDR(offset) (void __force __iomem *) \ (MX21ADS_MMIO_BASE_ADDR + (offset)) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 51010bfc792..102abd6dbc0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -99,6 +99,9 @@ static inline void hard_irq_disable(void) get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; } +/* include/linux/interrupt.h needs hard_irq_disable to be a macro */ +#define hard_irq_disable hard_irq_disable + /* * This is called by asynchronous interrupts to conditionally * re-enable hard interrupts when soft-disabled after having diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c9866b0b77d..1f5c30703ad 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -84,7 +84,7 @@ config X86 select DCACHE_WORD_ACCESS config INSTRUCTION_DECODER - def_bool (KPROBES || PERF_EVENTS) + def_bool (KPROBES || PERF_EVENTS || UPROBES) config OUTPUT_FORMAT string @@ -243,6 +243,9 @@ config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU +config ARCH_SUPPORTS_UPROBES + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6df8ccd71..0710c11305d 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -85,6 +85,7 @@ struct thread_info { #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ +#define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -109,6 +110,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) +#define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h new file mode 100644 index 00000000000..f3971bbcd1d --- /dev/null +++ b/arch/x86/include/asm/uprobes.h @@ -0,0 +1,57 @@ +#ifndef _ASM_UPROBES_H +#define _ASM_UPROBES_H +/* + * User-space Probes (UProbes) for x86 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2008-2011 + * Authors: + * Srikar Dronamraju + * Jim Keniston + */ + +#include <linux/notifier.h> + +typedef u8 uprobe_opcode_t; + +#define MAX_UINSN_BYTES 16 +#define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */ + +#define UPROBE_SWBP_INSN 0xcc +#define UPROBE_SWBP_INSN_SIZE 1 + +struct arch_uprobe { + u16 fixups; + u8 insn[MAX_UINSN_BYTES]; +#ifdef CONFIG_X86_64 + unsigned long rip_rela_target_address; +#endif +}; + +struct arch_uprobe_task { + unsigned long saved_trap_nr; +#ifdef CONFIG_X86_64 + unsigned long saved_scratch_register; +#endif +}; + +extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); +extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); +extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +#endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 532d2e090e6..d23d83577d6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -101,6 +101,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o +obj-$(CONFIG_UPROBES) += uprobes.o ### # 64 bit specific files diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index cf1178332bc..4609190683f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1415,6 +1415,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } +/* + * This is declared in linux/regset.h and defined in machine-dependent + * code. We put the export here to ensure no machine forgets it. + */ +EXPORT_SYMBOL_GPL(task_user_regset_view); + static void fill_sigtrap_info(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code, diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 115eac43148..041af2fd088 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -18,6 +18,7 @@ #include <linux/personality.h> #include <linux/uaccess.h> #include <linux/user-return-notifier.h> +#include <linux/uprobes.h> #include <asm/processor.h> #include <asm/ucontext.h> @@ -824,6 +825,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ + if (thread_info_flags & _TIF_UPROBE) { + clear_thread_flag(TIF_UPROBE); + uprobe_notify_resume(regs); + } + /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c new file mode 100644 index 00000000000..36fd42091fa --- /dev/null +++ b/arch/x86/kernel/uprobes.c @@ -0,0 +1,675 @@ +/* + * User-space Probes (UProbes) for x86 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2008-2011 + * Authors: + * Srikar Dronamraju + * Jim Keniston + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/uprobes.h> +#include <linux/uaccess.h> + +#include <linux/kdebug.h> +#include <asm/processor.h> +#include <asm/insn.h> + +/* Post-execution fixups. */ + +/* No fixup needed */ +#define UPROBE_FIX_NONE 0x0 + +/* Adjust IP back to vicinity of actual insn */ +#define UPROBE_FIX_IP 0x1 + +/* Adjust the return address of a call insn */ +#define UPROBE_FIX_CALL 0x2 + +#define UPROBE_FIX_RIP_AX 0x8000 +#define UPROBE_FIX_RIP_CX 0x4000 + +#define UPROBE_TRAP_NR UINT_MAX + +/* Adaptations for mhiramat x86 decoder v14. */ +#define OPCODE1(insn) ((insn)->opcode.bytes[0]) +#define OPCODE2(insn) ((insn)->opcode.bytes[1]) +#define OPCODE3(insn) ((insn)->opcode.bytes[2]) +#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) + +#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ + (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ + (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ + (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ + (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ + << (row % 32)) + +/* + * Good-instruction tables for 32-bit apps. This is non-const and volatile + * to keep gcc from statically optimizing it out, as variable_test_bit makes + * some versions of gcc to think only *(unsigned long*) is used. + */ +static volatile u32 good_insns_32[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ + W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ + W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ + W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ + W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ + W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ + W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; + +/* Using this for both 64-bit and 32-bit apps */ +static volatile u32 good_2byte_insns[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ + W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ + W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ + W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ + W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ + W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ + W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; + +#ifdef CONFIG_X86_64 +/* Good-instruction tables for 64-bit apps */ +static volatile u32 good_insns_64[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ + W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ + W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ + W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ + W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ + W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ + W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; +#endif +#undef W + +/* + * opcodes we'll probably never support: + * + * 6c-6d, e4-e5, ec-ed - in + * 6e-6f, e6-e7, ee-ef - out + * cc, cd - int3, int + * cf - iret + * d6 - illegal instruction + * f1 - int1/icebp + * f4 - hlt + * fa, fb - cli, sti + * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 + * + * invalid opcodes in 64-bit mode: + * + * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 + * 63 - we support this opcode in x86_64 but not in i386. + * + * opcodes we may need to refine support for: + * + * 0f - 2-byte instructions: For many of these instructions, the validity + * depends on the prefix and/or the reg field. On such instructions, we + * just consider the opcode combination valid if it corresponds to any + * valid instruction. + * + * 8f - Group 1 - only reg = 0 is OK + * c6-c7 - Group 11 - only reg = 0 is OK + * d9-df - fpu insns with some illegal encodings + * f2, f3 - repnz, repz prefixes. These are also the first byte for + * certain floating-point instructions, such as addsd. + * + * fe - Group 4 - only reg = 0 or 1 is OK + * ff - Group 5 - only reg = 0-6 is OK + * + * others -- Do we need to support these? + * + * 0f - (floating-point?) prefetch instructions + * 07, 17, 1f - pop es, pop ss, pop ds + * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- + * but 64 and 65 (fs: and gs:) seem to be used, so we support them + * 67 - addr16 prefix + * ce - into + * f0 - lock prefix + */ + +/* + * TODO: + * - Where necessary, examine the modrm byte and allow only valid instructions + * in the different Groups and fpu instructions. + */ + +static bool is_prefix_bad(struct insn *insn) +{ + int i; + + for (i = 0; i < insn->prefixes.nbytes; i++) { + switch (insn->prefixes.bytes[i]) { + case 0x26: /* INAT_PFX_ES */ + case 0x2E: /* INAT_PFX_CS */ + case 0x36: /* INAT_PFX_DS */ + case 0x3E: /* INAT_PFX_SS */ + case 0xF0: /* INAT_PFX_LOCK */ + return true; + } + } + return false; +} + +static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) +{ + insn_init(insn, auprobe->insn, false); + + /* Skip good instruction prefixes; reject "bad" ones. */ + insn_get_opcode(insn); + if (is_prefix_bad(insn)) + return -ENOTSUPP; + + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) + return 0; + + if (insn->opcode.nbytes == 2) { + if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) + return 0; + } + + return -ENOTSUPP; +} + +/* + * Figure out which fixups arch_uprobe_post_xol() will need to perform, and + * annotate arch_uprobe->fixups accordingly. To start with, + * arch_uprobe->fixups is either zero or it reflects rip-related fixups. + */ +static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) +{ + bool fix_ip = true, fix_call = false; /* defaults */ + int reg; + + insn_get_opcode(insn); /* should be a nop */ + + switch (OPCODE1(insn)) { + case 0xc3: /* ret/lret */ + case 0xcb: + case 0xc2: + case 0xca: + /* ip is correct */ + fix_ip = false; + break; + case 0xe8: /* call relative - Fix return addr */ + fix_call = true; + break; + case 0x9a: /* call absolute - Fix return addr, not ip */ + fix_call = true; + fix_ip = false; + break; + case 0xff: + insn_get_modrm(insn); + reg = MODRM_REG(insn); + if (reg == 2 || reg == 3) { + /* call or lcall, indirect */ + /* Fix return addr; ip is correct. */ + fix_call = true; + fix_ip = false; + } else if (reg == 4 || reg == 5) { + /* jmp or ljmp, indirect */ + /* ip is correct. */ + fix_ip = false; + } + break; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip = false; + break; + default: + break; + } + if (fix_ip) + auprobe->fixups |= UPROBE_FIX_IP; + if (fix_call) + auprobe->fixups |= UPROBE_FIX_CALL; +} + +#ifdef CONFIG_X86_64 +/* + * If arch_uprobe->insn doesn't use rip-relative addressing, return + * immediately. Otherwise, rewrite the instruction so that it accesses + * its memory operand indirectly through a scratch register. Set + * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address + * accordingly. (The contents of the scratch register will be saved + * before we single-step the modified instruction, and restored + * afterward.) + * + * We do this because a rip-relative instruction can access only a + * relatively small area (+/- 2 GB from the instruction), and the XOL + * area typically lies beyond that area. At least for instructions + * that store to memory, we can't execute the original instruction + * and "fix things up" later, because the misdirected store could be + * disastrous. + * + * Some useful facts about rip-relative instructions: + * + * - There's always a modrm byte. + * - There's never a SIB byte. + * - The displacement is always 4 bytes. + */ +static void +handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +{ + u8 *cursor; + u8 reg; + + if (mm->context.ia32_compat) + return; + + auprobe->rip_rela_target_address = 0x0; + if (!insn_rip_relative(insn)) + return; + + /* + * insn_rip_relative() would have decoded rex_prefix, modrm. + * Clear REX.b bit (extension of MODRM.rm field): + * we want to encode rax/rcx, not r8/r9. + */ + if (insn->rex_prefix.nbytes) { + cursor = auprobe->insn + insn_offset_rex_prefix(insn); + *cursor &= 0xfe; /* Clearing REX.B bit */ + } + + /* + * Point cursor at the modrm byte. The next 4 bytes are the + * displacement. Beyond the displacement, for some instructions, + * is the immediate operand. + */ + cursor = auprobe->insn + insn_offset_modrm(insn); + insn_get_length(insn); + + /* + * Convert from rip-relative addressing to indirect addressing + * via a scratch register. Change the r/m field from 0x5 (%rip) + * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. + */ + reg = MODRM_REG(insn); + if (reg == 0) { + /* + * The register operand (if any) is either the A register + * (%rax, %eax, etc.) or (if the 0x4 bit is set in the + * REX prefix) %r8. In any case, we know the C register + * is NOT the register operand, so we use %rcx (register + * #1) for the scratch register. + */ + auprobe->fixups = UPROBE_FIX_RIP_CX; + /* Change modrm from 00 000 101 to 00 000 001. */ + *cursor = 0x1; + } else { + /* Use %rax (register #0) for the scratch register. */ + auprobe->fixups = UPROBE_FIX_RIP_AX; + /* Change modrm from 00 xxx 101 to 00 xxx 000 */ + *cursor = (reg << 3); + } + + /* Target address = address of next instruction + (signed) offset */ + auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; + + /* Displacement field is gone; slide immediate field (if any) over. */ + if (insn->immediate.nbytes) { + cursor++; + memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); + } + return; +} + +static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) +{ + insn_init(insn, auprobe->insn, true); + + /* Skip good instruction prefixes; reject "bad" ones. */ + insn_get_opcode(insn); + if (is_prefix_bad(insn)) + return -ENOTSUPP; + + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) + return 0; + + if (insn->opcode.nbytes == 2) { + if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) + return 0; + } + return -ENOTSUPP; +} + +static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +{ + if (mm->context.ia32_compat) + return validate_insn_32bits(auprobe, insn); + return validate_insn_64bits(auprobe, insn); +} +#else /* 32-bit: */ +static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +{ + /* No RIP-relative addressing on 32-bit */ +} + +static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +{ + return validate_insn_32bits(auprobe, insn); +} +#endif /* CONFIG_X86_64 */ + +/** + * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint + * Return 0 on success or a -ve number on error. + */ +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) +{ + int ret; + struct insn insn; + + auprobe->fixups = 0; + ret = validate_insn_bits(auprobe, mm, &insn); + if (ret != 0) + return ret; + + handle_riprel_insn(auprobe, mm, &insn); + prepare_fixups(auprobe, &insn); + + return 0; +} + +#ifdef CONFIG_X86_64 +/* + * If we're emulating a rip-relative instruction, save the contents + * of the scratch register and store the target address in that register. + */ +static void +pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, + struct arch_uprobe_task *autask) +{ + if (auprobe->fixups & UPROBE_FIX_RIP_AX) { + autask->saved_scratch_register = regs->ax; + regs->ax = current->utask->vaddr; + regs->ax += auprobe->rip_rela_target_address; + } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { + autask->saved_scratch_register = regs->cx; + regs->cx = current->utask->vaddr; + regs->cx += auprobe->rip_rela_target_address; + } +} +#else +static void +pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, + struct arch_uprobe_task *autask) +{ + /* No RIP-relative addressing on 32-bit */ +} +#endif + +/* + * arch_uprobe_pre_xol - prepare to execute out of line. + * @auprobe: the probepoint information. + * @regs: reflects the saved user state of current task. + */ +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct arch_uprobe_task *autask; + + autask = ¤t->utask->autask; + autask->saved_trap_nr = current->thread.trap_nr; + current->thread.trap_nr = UPROBE_TRAP_NR; + regs->ip = current->utask->xol_vaddr; + pre_xol_rip_insn(auprobe, regs, autask); + + return 0; +} + +/* + * This function is called by arch_uprobe_post_xol() to adjust the return + * address pushed by a call instruction executed out of line. + */ +static int adjust_ret_addr(unsigned long sp, long correction) +{ + int rasize, ncopied; + long ra = 0; + + if (is_ia32_task()) + rasize = 4; + else + rasize = 8; + + ncopied = copy_from_user(&ra, (void __user *)sp, rasize); + if (unlikely(ncopied)) + return -EFAULT; + + ra += correction; + ncopied = copy_to_user((void __user *)sp, &ra, rasize); + if (unlikely(ncopied)) + return -EFAULT; + + return 0; +} + +#ifdef CONFIG_X86_64 +static bool is_riprel_insn(struct arch_uprobe *auprobe) +{ + return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0); +} + +static void +handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +{ + if (is_riprel_insn(auprobe)) { + struct arch_uprobe_task *autask; + + autask = ¤t->utask->autask; + if (auprobe->fixups & UPROBE_FIX_RIP_AX) + regs->ax = autask->saved_scratch_register; + else + regs->cx = autask->saved_scratch_register; + + /* + * The original instruction includes a displacement, and so + * is 4 bytes longer than what we've just single-stepped. + * Fall through to handle stuff like "jmpq *...(%rip)" and + * "callq *...(%rip)". + */ + if (correction) + *correction += 4; + } +} +#else +static void +handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +{ + /* No RIP-relative addressing on 32-bit */ +} +#endif + +/* + * If xol insn itself traps and generates a signal(Say, + * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped + * instruction jumps back to its own address. It is assumed that anything + * like do_page_fault/do_trap/etc sets thread.trap_nr != -1. + * + * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr, + * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to + * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol(). + */ +bool arch_uprobe_xol_was_trapped(struct task_struct *t) +{ + if (t->thread.trap_nr != UPROBE_TRAP_NR) + return true; + + return false; +} + +/* + * Called after single-stepping. To avoid the SMP problems that can + * occur when we temporarily put back the original opcode to + * single-step, we single-stepped a copy of the instruction. + * + * This function prepares to resume execution after the single-step. + * We have to fix things up as follows: + * + * Typically, the new ip is relative to the copied instruction. We need + * to make it relative to the original instruction (FIX_IP). Exceptions + * are return instructions and absolute or indirect jump or call instructions. + * + * If the single-stepped instruction was a call, the return address that + * is atop the stack is the address following the copied instruction. We + * need to make it the address following the original instruction (FIX_CALL). + * + * If the original instruction was a rip-relative instruction such as + * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent + * instruction using a scratch register -- e.g., "movl %edx,(%rax)". + * We need to restore the contents of the scratch register and adjust + * the ip, keeping in mind that the instruction we executed is 4 bytes + * shorter than the original instruction (since we squeezed out the offset + * field). (FIX_RIP_AX or FIX_RIP_CX) + */ +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask; + long correction; + int result = 0; + + WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + + utask = current->utask; + current->thread.trap_nr = utask->autask.saved_trap_nr; + correction = (long)(utask->vaddr - utask->xol_vaddr); + handle_riprel_post_xol(auprobe, regs, &correction); + if (auprobe->fixups & UPROBE_FIX_IP) + regs->ip += correction; + + if (auprobe->fixups & UPROBE_FIX_CALL) + result = adjust_ret_addr(regs->sp, correction); + + return result; +} + +/* callback routine for handling exceptions. */ +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + int ret = NOTIFY_DONE; + + /* We are only interested in userspace traps */ + if (regs && !user_mode_vm(regs)) + return NOTIFY_DONE; + + switch (val) { + case DIE_INT3: + if (uprobe_pre_sstep_notifier(regs)) + ret = NOTIFY_STOP; + + break; + + case DIE_DEBUG: + if (uprobe_post_sstep_notifier(regs)) + ret = NOTIFY_STOP; + + default: + break; + } + + return ret; +} + +/* + * This function gets called when XOL instruction either gets trapped or + * the thread has a fatal signal, so reset the instruction pointer to its + * probed address. + */ +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + current->thread.trap_nr = utask->autask.saved_trap_nr; + handle_riprel_post_xol(auprobe, regs, NULL); + instruction_pointer_set(regs, utask->vaddr); +} + +/* + * Skip these instructions as per the currently known x86 ISA. + * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } + */ +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + int i; + + for (i = 0; i < MAX_UINSN_BYTES; i++) { + if ((auprobe->insn[i] == 0x66)) + continue; + + if (auprobe->insn[i] == 0x90) + return true; + + if (i == (MAX_UINSN_BYTES - 1)) + break; + + if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f)) + return true; + + if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19)) + return true; + + if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0)) + return true; + + break; + } + return false; +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 34cc627f18f..97559a67400 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -207,6 +207,9 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +#define CPUID_THERM_POWER_LEAF 6 +#define APERFMPERF_PRESENT 0 + static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; @@ -240,6 +243,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *dx = cpuid_leaf5_edx_val; return; + case CPUID_THERM_POWER_LEAF: + /* Disabling APERFMPERF for kernel usage */ + maskecx = ~(1 << APERFMPERF_PRESENT); + break; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; diff --git a/config-generic b/config-generic index 35146b86a25..2429affc6d1 100644 --- a/config-generic +++ b/config-generic @@ -2446,7 +2446,7 @@ CONFIG_DRM_NOUVEAU_DEBUG=y # CONFIG_DRM_PSB is not set CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set CONFIG_DRM_VMWGFX=m CONFIG_DRM_VGEM=m @@ -4062,6 +4062,7 @@ CONFIG_STACK_TRACER=y CONFIG_KPROBES=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y # CONFIG_KPROBES_SANITY_TEST is not set CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y diff --git a/configs/kernel-3.4.3-x86_64-debug.config b/configs/kernel-3.4.4-x86_64-debug.config index 40034e222ed..748421d537e 100644 --- a/configs/kernel-3.4.3-x86_64-debug.config +++ b/configs/kernel-3.4.4-x86_64-debug.config @@ -1,7 +1,7 @@ # x86_64 # # Automatically generated file; DO NOT EDIT. -# Linux/x86_64 3.4.3 Kernel Configuration +# Linux/x86_64 3.4.4 Kernel Configuration # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -50,6 +50,7 @@ CONFIG_X86_HT=y CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" # CONFIG_KTIME_SCALAR is not set CONFIG_ARCH_CPU_PROBE_RELEASE=y +CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_HAVE_IRQ_WORK=y CONFIG_IRQ_WORK=y @@ -198,6 +199,7 @@ CONFIG_OPROFILE_NMI_TIMER=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y +CONFIG_UPROBES=y CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_KRETPROBES=y CONFIG_USER_RETURN_NOTIFIER=y @@ -3528,7 +3530,6 @@ CONFIG_VGA_ARB=y CONFIG_VGA_ARB_MAX_GPUS=16 CONFIG_VGA_SWITCHEROO=y CONFIG_DRM=m -CONFIG_DRM_USB=m CONFIG_DRM_KMS_HELPER=m # CONFIG_DRM_LOAD_EDID_FIRMWARE is not set CONFIG_DRM_TTM=m @@ -3557,7 +3558,7 @@ CONFIG_DRM_VMWGFX=m CONFIG_DRM_GMA500=m # CONFIG_DRM_GMA600 is not set CONFIG_DRM_GMA3600=y -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set # CONFIG_STUB_POULSBO is not set CONFIG_VGASTATE=m CONFIG_VIDEO_OUTPUT_CONTROL=m @@ -4891,6 +4892,8 @@ CONFIG_BRANCH_PROFILE_NONE=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y +CONFIG_PROBE_EVENTS=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FUNCTION_PROFILER=y CONFIG_FTRACE_MCOUNT_RECORD=y diff --git a/configs/kernel-3.4.3-x86_64.config b/configs/kernel-3.4.4-x86_64.config index b505fbe81e6..782006d9112 100644 --- a/configs/kernel-3.4.3-x86_64.config +++ b/configs/kernel-3.4.4-x86_64.config @@ -1,7 +1,7 @@ # x86_64 # # Automatically generated file; DO NOT EDIT. -# Linux/x86_64 3.4.3 Kernel Configuration +# Linux/x86_64 3.4.4 Kernel Configuration # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -50,6 +50,7 @@ CONFIG_X86_HT=y CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" # CONFIG_KTIME_SCALAR is not set CONFIG_ARCH_CPU_PROBE_RELEASE=y +CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_HAVE_IRQ_WORK=y CONFIG_IRQ_WORK=y @@ -197,6 +198,7 @@ CONFIG_OPROFILE_NMI_TIMER=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y +CONFIG_UPROBES=y CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_KRETPROBES=y CONFIG_USER_RETURN_NOTIFIER=y @@ -3524,7 +3526,6 @@ CONFIG_VGA_ARB=y CONFIG_VGA_ARB_MAX_GPUS=16 CONFIG_VGA_SWITCHEROO=y CONFIG_DRM=m -CONFIG_DRM_USB=m CONFIG_DRM_KMS_HELPER=m # CONFIG_DRM_LOAD_EDID_FIRMWARE is not set CONFIG_DRM_TTM=m @@ -3553,7 +3554,7 @@ CONFIG_DRM_VMWGFX=m CONFIG_DRM_GMA500=m # CONFIG_DRM_GMA600 is not set CONFIG_DRM_GMA3600=y -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set # CONFIG_STUB_POULSBO is not set CONFIG_VGASTATE=m CONFIG_VIDEO_OUTPUT_CONTROL=m @@ -4861,6 +4862,8 @@ CONFIG_BRANCH_PROFILE_NONE=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y +CONFIG_PROBE_EVENTS=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FUNCTION_PROFILER=y CONFIG_FTRACE_MCOUNT_RECORD=y diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index 6289f0eee24..731c9046cf7 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -34,7 +34,7 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max, u32 *data = buf; /* data ready? */ - if (readl(trng->base + TRNG_ODATA) & 1) { + if (readl(trng->base + TRNG_ISR) & 1) { *data = readl(trng->base + TRNG_ODATA); /* ensure data ready is only set again AFTER the next data diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 85226ccf529..0fe2277d867 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1932,12 +1932,6 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->bank != 8) return NOTIFY_DONE; -#ifdef CONFIG_SMP - /* Only handle if it is the right mc controller */ - if (mce->socketid != pvt->i7core_dev->socket) - return NOTIFY_DONE; -#endif - smp_rmb(); if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { smp_wmb(); @@ -2234,8 +2228,6 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) if (pvt->enable_scrub) disable_sdram_scrub_setting(mci); - mce_unregister_decode_chain(&i7_mce_dec); - /* Disable EDAC polling */ i7core_pci_ctl_release(pvt); @@ -2336,8 +2328,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* DCLK for scrub rate setting */ pvt->dclk_freq = get_dclk_freq(); - mce_register_decode_chain(&i7_mce_dec); - return 0; fail0: @@ -2481,8 +2471,10 @@ static int __init i7core_init(void) pci_rc = pci_register_driver(&i7core_driver); - if (pci_rc >= 0) + if (pci_rc >= 0) { + mce_register_decode_chain(&i7_mce_dec); return 0; + } i7core_printk(KERN_ERR, "Failed to register device with error %d.\n", pci_rc); @@ -2498,6 +2490,7 @@ static void __exit i7core_exit(void) { debugf2("MC: " __FILE__ ": %s()\n", __func__); pci_unregister_driver(&i7core_driver); + mce_unregister_decode_chain(&i7_mce_dec); } module_init(i7core_init); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a203536d90d..0f9552d6b56 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -599,7 +599,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) pvt->is_close_pg = false; } - pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, ®); + pci_read_config_dword(pvt->pci_ddrio, RANK_CFG_A, ®); if (IS_RDIMM_ENABLED(reg)) { /* FIXME: Can also be LRDIMM */ debugf0("Memory is registered\n"); @@ -1669,8 +1669,6 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", __func__, mci, &sbridge_dev->pdev[0]->dev); - mce_unregister_decode_chain(&sbridge_mce_dec); - /* Remove MC sysfs nodes */ edac_mc_del_mc(mci->dev); @@ -1738,7 +1736,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) goto fail0; } - mce_register_decode_chain(&sbridge_mce_dec); return 0; fail0: @@ -1867,8 +1864,10 @@ static int __init sbridge_init(void) pci_rc = pci_register_driver(&sbridge_driver); - if (pci_rc >= 0) + if (pci_rc >= 0) { + mce_register_decode_chain(&sbridge_mce_dec); return 0; + } sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", pci_rc); @@ -1884,6 +1883,7 @@ static void __exit sbridge_exit(void) { debugf2("MC: " __FILE__ ": %s()\n", __func__); pci_unregister_driver(&sbridge_driver); + mce_unregister_decode_chain(&sbridge_mce_dec); } module_init(sbridge_init); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index a053b322043..7daef381443 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -368,7 +368,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, int recv_bytes; uint32_t status; uint32_t aux_clock_divider; - int try, precharge = 5; + int try, precharge; intel_dp_check_edp(intel_dp); /* The clock divider is based off the hrawclk, @@ -388,6 +388,11 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, else aux_clock_divider = intel_hrawclk(dev) / 2; + if (IS_GEN6(dev)) + precharge = 3; + else + precharge = 5; + /* Try to wait for any previous AUX channel activity */ for (try = 0; try < 3; try++) { status = I915_READ(ch_ctl); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 51e8d086767..e72c03ff595 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2210,6 +2210,9 @@ static void evergreen_gpu_init(struct radeon_device *rdev) smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.evergreen.sx_num_of_sets); WREG32(SMX_DC_CTL0, smx_dc_ctl0); + if (rdev->family <= CHIP_SUMO2) + WREG32(SMX_SAR_CTL0, 0x00010000); + WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_size / 4) - 1) | POSITION_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_pos_size / 4) - 1) | SMX_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_smx_size / 4) - 1))); diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index a5b88aad834..f62ccd3555d 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -273,6 +273,7 @@ #define SCRATCH_UMSK 0x8540 #define SCRATCH_ADDR 0x8544 +#define SMX_SAR_CTL0 0xA008 #define SMX_DC_CTL0 0xA020 #define USE_HASH_FUNCTION (1 << 0) #define NUMBER_OF_SETS(x) ((x) << 1) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 0e7395b77c2..22a1a1cadb7 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1914,6 +1914,7 @@ void r600_gpu_init(struct radeon_device *rdev) WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3))); WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095)); + WREG32(VC_ENHANCE, 0); } diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 59f9c993cc3..12ceb829a03 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -483,6 +483,7 @@ #define TC_L2_SIZE(x) ((x)<<5) #define L2_DISABLE_LATE_HIT (1<<9) +#define VC_ENHANCE 0x9714 #define VGT_CACHE_INVALIDATION 0x88C4 #define CACHE_INVALIDATION(x) ((x)<<0) diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 80a292bc457..591040b9466 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -782,6 +782,9 @@ static void rv770_gpu_init(struct radeon_device *rdev) ACK_FLUSH_CTL(3) | SYNC_FLUSH_CTL)); + if (rdev->family != CHIP_RV770) + WREG32(SMX_SAR_CTL0, 0x00003f3f); + db_debug3 = RREG32(DB_DEBUG3); db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f); switch (rdev->family) { @@ -960,7 +963,7 @@ static void rv770_gpu_init(struct radeon_device *rdev) WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3))); - + WREG32(VC_ENHANCE, 0); } void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index 75380927e9c..7095a713ad8 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -208,6 +208,7 @@ #define SCRATCH_UMSK 0x8540 #define SCRATCH_ADDR 0x8544 +#define SMX_SAR_CTL0 0xA008 #define SMX_DC_CTL0 0xA020 #define USE_HASH_FUNCTION (1 << 0) #define CACHE_DEPTH(x) ((x) << 1) @@ -307,6 +308,8 @@ #define TCP_CNTL 0x9610 #define TCP_CHAN_STEER 0x9614 +#define VC_ENHANCE 0x9714 + #define VGT_CACHE_INVALIDATION 0x88C4 #define CACHE_INVALIDATION(x) ((x)<<0) #define VC_ONLY 0 diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 30d98d14b5c..dd14cd1a003 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -47,9 +47,9 @@ static int sis_driver_load(struct drm_device *dev, unsigned long chipset) if (dev_priv == NULL) return -ENOMEM; + idr_init(&dev_priv->object_idr); dev->dev_private = (void *)dev_priv; dev_priv->chipset = chipset; - idr_init(&dev->object_name_idr); return 0; } diff --git a/drivers/gpu/drm/via/via_map.c b/drivers/gpu/drm/via/via_map.c index 1f182254e81..c126182ac07 100644 --- a/drivers/gpu/drm/via/via_map.c +++ b/drivers/gpu/drm/via/via_map.c @@ -100,12 +100,11 @@ int via_driver_load(struct drm_device *dev, unsigned long chipset) if (dev_priv == NULL) return -ENOMEM; + idr_init(&dev_priv->object_idr); dev->dev_private = (void *)dev_priv; dev_priv->chipset = chipset; - idr_init(&dev->object_name_idr); - pci_set_master(dev->pdev); ret = drm_vblank_init(dev, 1); diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index b3fdc6977f2..c6d95f2031e 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2061,8 +2061,9 @@ const struct e1000_info e1000_82574_info = { | FLAG_HAS_SMART_POWER_DOWN | FLAG_HAS_AMT | FLAG_HAS_CTRLEXT_ON_LOAD, - .flags2 = FLAG2_CHECK_PHY_HANG + .flags2 = FLAG2_CHECK_PHY_HANG | FLAG2_DISABLE_ASPM_L0S + | FLAG2_DISABLE_ASPM_L1 | FLAG2_NO_DISABLE_RX, .pba = 32, .max_hw_frame_size = DEFAULT_JUMBO, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 34fd3aba5bc..8ba6b57e3b6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5293,14 +5293,6 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - /* 82573 Errata 17 */ - if (((adapter->hw.mac.type == e1000_82573) || - (adapter->hw.mac.type == e1000_82574)) && - (max_frame > ETH_FRAME_LEN + ETH_FCS_LEN)) { - adapter->flags2 |= FLAG2_DISABLE_ASPM_L1; - e1000e_disable_aspm(adapter->pdev, PCIE_LINK_STATE_L1); - } - while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) usleep_range(1000, 2000); /* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */ diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c index 9f71b85f591..c0cfa4e652c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-6000.c +++ b/drivers/net/wireless/iwlwifi/iwl-6000.c @@ -49,17 +49,20 @@ #define IWL6000_UCODE_API_MAX 6 #define IWL6050_UCODE_API_MAX 5 #define IWL6000G2_UCODE_API_MAX 6 +#define IWL6035_UCODE_API_MAX 6 /* Oldest version we won't warn about */ #define IWL6000_UCODE_API_OK 4 #define IWL6000G2_UCODE_API_OK 5 #define IWL6050_UCODE_API_OK 5 #define IWL6000G2B_UCODE_API_OK 6 +#define IWL6035_UCODE_API_OK 6 /* Lowest firmware API version supported */ #define IWL6000_UCODE_API_MIN 4 #define IWL6050_UCODE_API_MIN 4 -#define IWL6000G2_UCODE_API_MIN 4 +#define IWL6000G2_UCODE_API_MIN 5 +#define IWL6035_UCODE_API_MIN 6 #define IWL6000_FW_PRE "iwlwifi-6000-" #define IWL6000_MODULE_FIRMWARE(api) IWL6000_FW_PRE __stringify(api) ".ucode" @@ -425,9 +428,25 @@ const struct iwl_cfg iwl6030_2bg_cfg = { IWL_DEVICE_6030, }; +#define IWL_DEVICE_6035 \ + .fw_name_pre = IWL6030_FW_PRE, \ + .ucode_api_max = IWL6035_UCODE_API_MAX, \ + .ucode_api_ok = IWL6035_UCODE_API_OK, \ + .ucode_api_min = IWL6035_UCODE_API_MIN, \ + .max_inst_size = IWL60_RTC_INST_SIZE, \ + .max_data_size = IWL60_RTC_DATA_SIZE, \ + .eeprom_ver = EEPROM_6030_EEPROM_VERSION, \ + .eeprom_calib_ver = EEPROM_6030_TX_POWER_VERSION, \ + .lib = &iwl6030_lib, \ + .base_params = &iwl6000_g2_base_params, \ + .bt_params = &iwl6000_bt_params, \ + .need_temp_offset_calib = true, \ + .led_mode = IWL_LED_RF_STATE, \ + .adv_pm = true + const struct iwl_cfg iwl6035_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N 6235 AGN", - IWL_DEVICE_6030, + IWL_DEVICE_6035, .ht_params = &iwl6000_ht_params, }; diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.c b/drivers/net/wireless/iwlwifi/iwl-eeprom.c index 23cea42b949..79dddc481da 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom.c +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.c @@ -513,28 +513,28 @@ static int iwl_find_otp_image(struct iwl_trans *trans, * iwl_get_max_txpower_avg - get the highest tx power from all chains. * find the highest tx power from all chains for the channel */ -static s8 iwl_get_max_txpower_avg(const struct iwl_cfg *cfg, +static s8 iwl_get_max_txpower_avg(struct iwl_priv *priv, struct iwl_eeprom_enhanced_txpwr *enhanced_txpower, int element, s8 *max_txpower_in_half_dbm) { s8 max_txpower_avg = 0; /* (dBm) */ /* Take the highest tx power from any valid chains */ - if ((cfg->valid_tx_ant & ANT_A) && + if ((hw_params(priv).valid_tx_ant & ANT_A) && (enhanced_txpower[element].chain_a_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].chain_a_max; - if ((cfg->valid_tx_ant & ANT_B) && + if ((hw_params(priv).valid_tx_ant & ANT_B) && (enhanced_txpower[element].chain_b_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].chain_b_max; - if ((cfg->valid_tx_ant & ANT_C) && + if ((hw_params(priv).valid_tx_ant & ANT_C) && (enhanced_txpower[element].chain_c_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].chain_c_max; - if (((cfg->valid_tx_ant == ANT_AB) | - (cfg->valid_tx_ant == ANT_BC) | - (cfg->valid_tx_ant == ANT_AC)) && + if (((hw_params(priv).valid_tx_ant == ANT_AB) | + (hw_params(priv).valid_tx_ant == ANT_BC) | + (hw_params(priv).valid_tx_ant == ANT_AC)) && (enhanced_txpower[element].mimo2_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].mimo2_max; - if ((cfg->valid_tx_ant == ANT_ABC) && + if ((hw_params(priv).valid_tx_ant == ANT_ABC) && (enhanced_txpower[element].mimo3_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].mimo3_max; @@ -637,7 +637,7 @@ static void iwl_eeprom_enhanced_txpower(struct iwl_priv *priv) ((txp->delta_20_in_40 & 0xf0) >> 4), (txp->delta_20_in_40 & 0x0f)); - max_txp_avg = iwl_get_max_txpower_avg(cfg(priv), txp_array, idx, + max_txp_avg = iwl_get_max_txpower_avg(priv, txp_array, idx, &max_txp_avg_halfdbm); /* diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h index 1c2fe87bd7e..3b844b79b14 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h +++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h @@ -342,7 +342,7 @@ void iwl_trans_pcie_tx_agg_setup(struct iwl_trans *trans, enum iwl_rxon_context_id ctx, int sta_id, int tid, int frame_limit, u16 ssn); void iwlagn_txq_free_tfd(struct iwl_trans *trans, struct iwl_tx_queue *txq, - int index, enum dma_data_direction dma_dir); + enum dma_data_direction dma_dir); int iwl_tx_queue_reclaim(struct iwl_trans *trans, int txq_id, int index, struct sk_buff_head *skbs); int iwl_queue_space(const struct iwl_queue *q); diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c index e92972fd6ec..d7964b12ef1 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c @@ -237,32 +237,38 @@ static void iwlagn_unmap_tfd(struct iwl_trans *trans, struct iwl_cmd_meta *meta, for (i = 1; i < num_tbs; i++) dma_unmap_single(trans->dev, iwl_tfd_tb_get_addr(tfd, i), iwl_tfd_tb_get_len(tfd, i), dma_dir); + + tfd->num_tbs = 0; } /** * iwlagn_txq_free_tfd - Free all chunks referenced by TFD [txq->q.read_ptr] * @trans - transport private data * @txq - tx queue - * @index - the index of the TFD to be freed - *@dma_dir - the direction of the DMA mapping + * @dma_dir - the direction of the DMA mapping * * Does NOT advance any TFD circular buffer read/write indexes * Does NOT free the TFD itself (which is within circular buffer) */ void iwlagn_txq_free_tfd(struct iwl_trans *trans, struct iwl_tx_queue *txq, - int index, enum dma_data_direction dma_dir) + enum dma_data_direction dma_dir) { struct iwl_tfd *tfd_tmp = txq->tfds; + /* rd_ptr is bounded by n_bd and idx is bounded by n_window */ + int rd_ptr = txq->q.read_ptr; + int idx = get_cmd_index(&txq->q, rd_ptr); + lockdep_assert_held(&txq->lock); - iwlagn_unmap_tfd(trans, &txq->meta[index], &tfd_tmp[index], dma_dir); + /* We have only q->n_window txq->entries, but we use q->n_bd tfds */ + iwlagn_unmap_tfd(trans, &txq->meta[idx], &tfd_tmp[rd_ptr], dma_dir); /* free SKB */ if (txq->skbs) { struct sk_buff *skb; - skb = txq->skbs[index]; + skb = txq->skbs[idx]; /* Can be called from irqs-disabled context * If skb is not NULL, it means that the whole queue is being @@ -270,7 +276,7 @@ void iwlagn_txq_free_tfd(struct iwl_trans *trans, struct iwl_tx_queue *txq, */ if (skb) { iwl_op_mode_free_skb(trans->op_mode, skb); - txq->skbs[index] = NULL; + txq->skbs[idx] = NULL; } } } @@ -1100,7 +1106,7 @@ int iwl_tx_queue_reclaim(struct iwl_trans *trans, int txq_id, int index, iwlagn_txq_inval_byte_cnt_tbl(trans, txq); - iwlagn_txq_free_tfd(trans, txq, txq->q.read_ptr, DMA_TO_DEVICE); + iwlagn_txq_free_tfd(trans, txq, DMA_TO_DEVICE); freed++; } return freed; diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c index 66df0166cfd..6eac984b2d0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c +++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c @@ -430,9 +430,7 @@ static void iwl_tx_queue_unmap(struct iwl_trans *trans, int txq_id) spin_lock_bh(&txq->lock); while (q->write_ptr != q->read_ptr) { - /* The read_ptr needs to bound by q->n_window */ - iwlagn_txq_free_tfd(trans, txq, get_cmd_index(q, q->read_ptr), - dma_dir); + iwlagn_txq_free_tfd(trans, txq, dma_dir); q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd); } spin_unlock_bh(&txq->lock); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 111569ccab4..f597a1aa5e6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1743,6 +1743,11 @@ int pci_prepare_to_sleep(struct pci_dev *dev) if (target_state == PCI_POWER_ERROR) return -EIO; + /* Some devices mustn't be in D3 during system sleep */ + if (target_state == PCI_D3hot && + (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)) + return 0; + pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev)); error = pci_set_power_state(dev, target_state); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8d0846e8886..d6244076e77 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2933,6 +2933,32 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); +/* + * The Intel 6 Series/C200 Series chipset's EHCI controllers on many + * ASUS motherboards will cause memory corruption or a system crash + * if they are in D3 while the system is put into S3 sleep. + */ +static void __devinit asus_ehci_no_d3(struct pci_dev *dev) +{ + const char *sys_info; + static const char good_Asus_board[] = "P8Z68-V"; + + if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP) + return; + if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK) + return; + sys_info = dmi_get_system_info(DMI_BOARD_NAME); + if (sys_info && memcmp(sys_info, good_Asus_board, + sizeof(good_Asus_board) - 1) == 0) + return; + + dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n"); + dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP; + device_set_wakeup_capable(&dev->dev, false); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/drivers/remoteproc/omap_remoteproc.c b/drivers/remoteproc/omap_remoteproc.c index 69425c4e86f..de138e30d3e 100644 --- a/drivers/remoteproc/omap_remoteproc.c +++ b/drivers/remoteproc/omap_remoteproc.c @@ -182,7 +182,7 @@ static int __devinit omap_rproc_probe(struct platform_device *pdev) ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { - dev_err(pdev->dev.parent, "dma_set_coherent_mask: %d\n", ret); + dev_err(&pdev->dev, "dma_set_coherent_mask: %d\n", ret); return ret; } diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index e756a0df366..7591b9774d0 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -247,7 +247,7 @@ rproc_load_segments(struct rproc *rproc, const u8 *elf_data, size_t len) } if (offset + filesz > len) { - dev_err(dev, "truncated fw: need 0x%x avail 0x%x\n", + dev_err(dev, "truncated fw: need 0x%x avail 0x%zx\n", offset + filesz, len); ret = -EINVAL; break; @@ -934,7 +934,7 @@ static void rproc_resource_cleanup(struct rproc *rproc) unmapped = iommu_unmap(rproc->domain, entry->da, entry->len); if (unmapped != entry->len) { /* nothing much to do besides complaining */ - dev_err(dev, "failed to unmap %u/%u\n", entry->len, + dev_err(dev, "failed to unmap %u/%zu\n", entry->len, unmapped); } @@ -1020,7 +1020,7 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) ehdr = (struct elf32_hdr *)fw->data; - dev_info(dev, "Booting fw image %s, size %d\n", name, fw->size); + dev_info(dev, "Booting fw image %s, size %zd\n", name, fw->size); /* * if enabling an IOMMU isn't relevant for this rproc, this is @@ -1041,8 +1041,10 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) /* look for the resource table */ table = rproc_find_rsc_table(rproc, fw->data, fw->size, &tablesz); - if (!table) + if (!table) { + ret = -EINVAL; goto clean_up; + } /* handle fw resources which are required to boot rproc */ ret = rproc_handle_boot_rsc(rproc, table, tablesz); diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 18084788ee3..3f03342b303 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -1785,7 +1785,7 @@ static inline void _base_writeq(__u64 b, volatile void __iomem *addr, static inline u8 _base_get_msix_index(struct MPT2SAS_ADAPTER *ioc) { - return ioc->cpu_msix_table[smp_processor_id()]; + return ioc->cpu_msix_table[raw_smp_processor_id()]; } /** diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index c7746a3339d..f30e124874b 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -351,9 +351,11 @@ int target_emulate_set_target_port_groups(struct se_task *task) out: transport_kunmap_data_sg(cmd); - task->task_scsi_status = GOOD; - transport_complete_task(task, 1); - return 0; + if (!rc) { + task->task_scsi_status = GOOD; + transport_complete_task(task, 1); + } + return rc; } static inline int core_alua_state_nonoptimized( diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index d3d91dae065..944eaeb8e0c 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -214,24 +214,24 @@ static int xen_hvm_console_init(void) /* already configured */ if (info->intf != NULL) return 0; - + /* + * If the toolstack (or the hypervisor) hasn't set these values, the + * default value is 0. Even though mfn = 0 and evtchn = 0 are + * theoretically correct values, in practice they never are and they + * mean that a legacy toolstack hasn't initialized the pv console correctly. + */ r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); - if (r < 0) { - kfree(info); - return -ENODEV; - } + if (r < 0 || v == 0) + goto err; info->evtchn = v; - hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); - if (r < 0) { - kfree(info); - return -ENODEV; - } + v = 0; + r = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); + if (r < 0 || v == 0) + goto err; mfn = v; info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE); - if (info->intf == NULL) { - kfree(info); - return -ENODEV; - } + if (info->intf == NULL) + goto err; info->vtermno = HVC_COOKIE; spin_lock(&xencons_lock); @@ -239,6 +239,9 @@ static int xen_hvm_console_init(void) spin_unlock(&xencons_lock); return 0; +err: + kfree(info); + return -ENODEV; } static int xen_pv_console_init(void) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index b32ccb46101..640cf7983b0 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -567,6 +567,14 @@ static int acm_port_activate(struct tty_port *port, struct tty_struct *tty) usb_autopm_put_interface(acm->control); + /* + * Unthrottle device in case the TTY was closed while throttled. + */ + spin_lock_irq(&acm->read_lock); + acm->throttled = 0; + acm->throttle_req = 0; + spin_unlock_irq(&acm->read_lock); + if (acm_submit_read_urbs(acm, GFP_KERNEL)) goto error_submit_read_urbs; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 4c8321ea4c5..83d14bfc2b2 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -55,6 +55,15 @@ static const struct usb_device_id wdm_ids[] = { .bInterfaceSubClass = 1, .bInterfaceProtocol = 9, /* NOTE: CDC ECM control interface! */ }, + { + /* Vodafone/Huawei K5005 (12d1:14c8) and similar modems */ + .match_flags = USB_DEVICE_ID_MATCH_VENDOR | + USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = HUAWEI_VENDOR_ID, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 57, /* NOTE: CDC ECM control interface! */ + }, { } }; diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 57ed9e400c0..622b4a48e73 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -493,15 +493,6 @@ static int hcd_pci_suspend_noirq(struct device *dev) pci_save_state(pci_dev); - /* - * Some systems crash if an EHCI controller is in D3 during - * a sleep transition. We have to leave such controllers in D0. - */ - if (hcd->broken_pci_sleep) { - dev_dbg(dev, "Staying in PCI D0\n"); - return retval; - } - /* If the root hub is dead rather than suspended, disallow remote * wakeup. usb_hc_died() should ensure that both hosts are marked as * dying, so we only need to check the primary roothub. diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index ca717da3be9..ef116a55afa 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1803,7 +1803,6 @@ free_interfaces: intfc = cp->intf_cache[i]; intf->altsetting = intfc->altsetting; intf->num_altsetting = intfc->num_altsetting; - intf->intf_assoc = find_iad(dev, cp, i); kref_get(&intfc->ref); alt = usb_altnum_to_altsetting(intf, 0); @@ -1816,6 +1815,8 @@ free_interfaces: if (!alt) alt = &intf->altsetting[0]; + intf->intf_assoc = + find_iad(dev, cp, alt->desc.bInterfaceNumber); intf->cur_altsetting = alt; usb_enable_interface(dev, intf, true); intf->dev.parent = &dev->dev; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 4a3bc5b7a06..bb73df6597b 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -671,7 +671,9 @@ static int ehci_init(struct usb_hcd *hcd) hw = ehci->async->hw; hw->hw_next = QH_NEXT(ehci, ehci->async->qh_dma); hw->hw_info1 = cpu_to_hc32(ehci, QH_HEAD); +#if defined(CONFIG_PPC_PS3) hw->hw_info1 |= cpu_to_hc32(ehci, (1 << 7)); /* I = 1 */ +#endif hw->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT); hw->hw_qtd_next = EHCI_LIST_END(ehci); ehci->async->qh_state = QH_STATE_LINKED; diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index bc94d7bf072..123481793a4 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -144,14 +144,6 @@ static int ehci_pci_setup(struct usb_hcd *hcd) hcd->has_tt = 1; tdi_reset(ehci); } - if (pdev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK) { - /* EHCI #1 or #2 on 6 Series/C200 Series chipset */ - if (pdev->device == 0x1c26 || pdev->device == 0x1c2d) { - ehci_info(ehci, "broken D3 during system sleep on ASUS\n"); - hcd->broken_pci_sleep = 1; - device_set_wakeup_capable(&pdev->dev, false); - } - } break; case PCI_VENDOR_ID_TDI: if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) { diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 497ed7723e4..6b908249b01 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -793,10 +793,9 @@ static void xhci_free_tt_info(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, int slot_id) { - struct list_head *tt; struct list_head *tt_list_head; - struct list_head *tt_next; - struct xhci_tt_bw_info *tt_info; + struct xhci_tt_bw_info *tt_info, *next; + bool slot_found = false; /* If the device never made it past the Set Address stage, * it may not have the real_port set correctly. @@ -808,34 +807,16 @@ static void xhci_free_tt_info(struct xhci_hcd *xhci, } tt_list_head = &(xhci->rh_bw[virt_dev->real_port - 1].tts); - if (list_empty(tt_list_head)) - return; - - list_for_each(tt, tt_list_head) { - tt_info = list_entry(tt, struct xhci_tt_bw_info, tt_list); - if (tt_info->slot_id == slot_id) + list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { + /* Multi-TT hubs will have more than one entry */ + if (tt_info->slot_id == slot_id) { + slot_found = true; + list_del(&tt_info->tt_list); + kfree(tt_info); + } else if (slot_found) { break; + } } - /* Cautionary measure in case the hub was disconnected before we - * stored the TT information. - */ - if (tt_info->slot_id != slot_id) - return; - - tt_next = tt->next; - tt_info = list_entry(tt, struct xhci_tt_bw_info, - tt_list); - /* Multi-TT hubs will have more than one entry */ - do { - list_del(tt); - kfree(tt_info); - tt = tt_next; - if (list_empty(tt_list_head)) - break; - tt_next = tt->next; - tt_info = list_entry(tt, struct xhci_tt_bw_info, - tt_list); - } while (tt_info->slot_id == slot_id); } int xhci_alloc_tt_info(struct xhci_hcd *xhci, @@ -1791,17 +1772,9 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) { struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); struct dev_info *dev_info, *next; - struct list_head *tt_list_head; - struct list_head *tt; - struct list_head *endpoints; - struct list_head *ep, *q; - struct xhci_tt_bw_info *tt_info; - struct xhci_interval_bw_table *bwt; - struct xhci_virt_ep *virt_ep; - unsigned long flags; int size; - int i; + int i, j, num_ports; /* Free the Event Ring Segment Table and the actual Event Ring */ size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); @@ -1858,21 +1831,22 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) } spin_unlock_irqrestore(&xhci->lock, flags); - bwt = &xhci->rh_bw->bw_table; - for (i = 0; i < XHCI_MAX_INTERVAL; i++) { - endpoints = &bwt->interval_bw[i].endpoints; - list_for_each_safe(ep, q, endpoints) { - virt_ep = list_entry(ep, struct xhci_virt_ep, bw_endpoint_list); - list_del(&virt_ep->bw_endpoint_list); - kfree(virt_ep); + num_ports = HCS_MAX_PORTS(xhci->hcs_params1); + for (i = 0; i < num_ports; i++) { + struct xhci_interval_bw_table *bwt = &xhci->rh_bw[i].bw_table; + for (j = 0; j < XHCI_MAX_INTERVAL; j++) { + struct list_head *ep = &bwt->interval_bw[j].endpoints; + while (!list_empty(ep)) + list_del_init(ep->next); } } - tt_list_head = &xhci->rh_bw->tts; - list_for_each_safe(tt, q, tt_list_head) { - tt_info = list_entry(tt, struct xhci_tt_bw_info, tt_list); - list_del(tt); - kfree(tt_info); + for (i = 0; i < num_ports; i++) { + struct xhci_tt_bw_info *tt, *n; + list_for_each_entry_safe(tt, n, &xhci->rh_bw[i].tts, tt_list) { + list_del(&tt->tt_list); + kfree(tt); + } } xhci->num_usb2_ports = 0; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 5910048b0a2..9248d496cb8 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -795,8 +795,8 @@ int xhci_suspend(struct xhci_hcd *xhci) command = xhci_readl(xhci, &xhci->op_regs->command); command |= CMD_CSS; xhci_writel(xhci, command, &xhci->op_regs->command); - if (handshake(xhci, &xhci->op_regs->status, STS_SAVE, 0, 10*100)) { - xhci_warn(xhci, "WARN: xHC CMD_CSS timeout\n"); + if (handshake(xhci, &xhci->op_regs->status, STS_SAVE, 0, 10 * 1000)) { + xhci_warn(xhci, "WARN: xHC save state timeout\n"); spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } @@ -848,8 +848,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) command |= CMD_CRS; xhci_writel(xhci, command, &xhci->op_regs->command); if (handshake(xhci, &xhci->op_regs->status, - STS_RESTORE, 0, 10*100)) { - xhci_dbg(xhci, "WARN: xHC CMD_CSS timeout\n"); + STS_RESTORE, 0, 10 * 1000)) { + xhci_warn(xhci, "WARN: xHC restore state timeout\n"); spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c index 768b4b55c81..9d63ba4d10d 100644 --- a/drivers/usb/musb/davinci.c +++ b/drivers/usb/musb/davinci.c @@ -34,6 +34,7 @@ #include <linux/dma-mapping.h> #include <mach/cputype.h> +#include <mach/hardware.h> #include <asm/mach-types.h> diff --git a/drivers/usb/musb/davinci.h b/drivers/usb/musb/davinci.h index 046c84433ca..371baa0ee50 100644 --- a/drivers/usb/musb/davinci.h +++ b/drivers/usb/musb/davinci.h @@ -15,7 +15,7 @@ */ /* Integrated highspeed/otg PHY */ -#define USBPHY_CTL_PADDR (DAVINCI_SYSTEM_MODULE_BASE + 0x34) +#define USBPHY_CTL_PADDR 0x01c40034 #define USBPHY_DATAPOL BIT(11) /* (dm355) switch D+/D- */ #define USBPHY_PHYCLKGD BIT(8) #define USBPHY_SESNDEN BIT(7) /* v(sess_end) comparator */ @@ -27,7 +27,7 @@ #define USBPHY_OTGPDWN BIT(1) #define USBPHY_PHYPDWN BIT(0) -#define DM355_DEEPSLEEP_PADDR (DAVINCI_SYSTEM_MODULE_BASE + 0x48) +#define DM355_DEEPSLEEP_PADDR 0x01c40048 #define DRVVBUS_FORCE BIT(2) #define DRVVBUS_OVERRIDE BIT(1) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index f42c29b11f7..95918dacc99 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1232,6 +1232,7 @@ static int musb_gadget_disable(struct usb_ep *ep) } musb_ep->desc = NULL; + musb_ep->end_point.desc = NULL; /* abort all pending DMA and requests */ nuke(musb_ep, -ESHUTDOWN); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index ec30f95ef39..95de9c08da1 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -82,6 +82,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */ { USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */ { USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */ + { USB_DEVICE(0x10C4, 0x80C4) }, /* Cygnal Integrated Products, Inc., Optris infrared thermometer */ { USB_DEVICE(0x10C4, 0x80CA) }, /* Degree Controls Inc */ { USB_DEVICE(0x10C4, 0x80DD) }, /* Tracient RFID */ { USB_DEVICE(0x10C4, 0x80F6) }, /* Suunto sports instrument */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 95bba992416..53e3e2c5b33 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -737,6 +737,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_SERIAL_VX7_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_CT29B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_RTS01_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MAXSTREAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PHI_FISCO_PID) }, { USB_DEVICE(TML_VID, TML_USB_SERIAL_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index f3c7c78ede3..5661c7e2d41 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -784,6 +784,7 @@ #define RTSYSTEMS_VID 0x2100 /* Vendor ID */ #define RTSYSTEMS_SERIAL_VX7_PID 0x9e52 /* Serial converter for VX-7 Radios using FT232RL */ #define RTSYSTEMS_CT29B_PID 0x9e54 /* CT29B Radio Cable */ +#define RTSYSTEMS_RTS01_PID 0x9e57 /* USB-RTS01 Radio Cable */ /* diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index 6edd26130e2..ef4d7adfbd9 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -317,13 +317,16 @@ static int mct_u232_set_modem_ctrl(struct usb_serial *serial, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_MODEM_CTRL_SIZE, WDR_TIMEOUT); - if (rc < 0) - dev_err(&serial->dev->dev, - "Set MODEM CTRL 0x%x failed (error = %d)\n", mcr, rc); + kfree(buf); + dbg("set_modem_ctrl: state=0x%x ==> mcr=0x%x", control_state, mcr); - kfree(buf); - return rc; + if (rc < 0) { + dev_err(&serial->dev->dev, + "Set MODEM CTRL 0x%x failed (error = %d)\n", mcr, rc); + return rc; + } + return 0; } /* mct_u232_set_modem_ctrl */ static int mct_u232_get_modem_stat(struct usb_serial *serial, diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index c526550694a..62739ff58c7 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -206,7 +206,7 @@ static const struct usb_device_id moschip_port_id_table[] = { {} /* terminating entry */ }; -static const struct usb_device_id moschip_id_table_combined[] __devinitconst = { +static const struct usb_device_id moschip_id_table_combined[] = { {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)}, {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2)}, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f4465ccddc3..2706d8acb94 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -47,6 +47,7 @@ /* Function prototypes */ static int option_probe(struct usb_serial *serial, const struct usb_device_id *id); +static void option_release(struct usb_serial *serial); static int option_send_setup(struct usb_serial_port *port); static void option_instat_callback(struct urb *urb); @@ -150,6 +151,7 @@ static void option_instat_callback(struct urb *urb); #define HUAWEI_PRODUCT_E14AC 0x14AC #define HUAWEI_PRODUCT_K3806 0x14AE #define HUAWEI_PRODUCT_K4605 0x14C6 +#define HUAWEI_PRODUCT_K5005 0x14C8 #define HUAWEI_PRODUCT_K3770 0x14C9 #define HUAWEI_PRODUCT_K3771 0x14CA #define HUAWEI_PRODUCT_K4510 0x14CB @@ -425,7 +427,7 @@ static void option_instat_callback(struct urb *urb); #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 -/* YUGA products www.yuga-info.com*/ +/* YUGA products www.yuga-info.com gavin.kx@qq.com */ #define YUGA_VENDOR_ID 0x257A #define YUGA_PRODUCT_CEM600 0x1601 #define YUGA_PRODUCT_CEM610 0x1602 @@ -442,6 +444,8 @@ static void option_instat_callback(struct urb *urb); #define YUGA_PRODUCT_CEU516 0x160C #define YUGA_PRODUCT_CEU528 0x160D #define YUGA_PRODUCT_CEU526 0x160F +#define YUGA_PRODUCT_CEU881 0x161F +#define YUGA_PRODUCT_CEU882 0x162F #define YUGA_PRODUCT_CWM600 0x2601 #define YUGA_PRODUCT_CWM610 0x2602 @@ -457,23 +461,26 @@ static void option_instat_callback(struct urb *urb); #define YUGA_PRODUCT_CWU518 0x260B #define YUGA_PRODUCT_CWU516 0x260C #define YUGA_PRODUCT_CWU528 0x260D +#define YUGA_PRODUCT_CWU581 0x260E #define YUGA_PRODUCT_CWU526 0x260F - -#define YUGA_PRODUCT_CLM600 0x2601 -#define YUGA_PRODUCT_CLM610 0x2602 -#define YUGA_PRODUCT_CLM500 0x2603 -#define YUGA_PRODUCT_CLM510 0x2604 -#define YUGA_PRODUCT_CLM800 0x2605 -#define YUGA_PRODUCT_CLM900 0x2606 - -#define YUGA_PRODUCT_CLU718 0x2607 -#define YUGA_PRODUCT_CLU716 0x2608 -#define YUGA_PRODUCT_CLU728 0x2609 -#define YUGA_PRODUCT_CLU726 0x260A -#define YUGA_PRODUCT_CLU518 0x260B -#define YUGA_PRODUCT_CLU516 0x260C -#define YUGA_PRODUCT_CLU528 0x260D -#define YUGA_PRODUCT_CLU526 0x260F +#define YUGA_PRODUCT_CWU582 0x261F +#define YUGA_PRODUCT_CWU583 0x262F + +#define YUGA_PRODUCT_CLM600 0x3601 +#define YUGA_PRODUCT_CLM610 0x3602 +#define YUGA_PRODUCT_CLM500 0x3603 +#define YUGA_PRODUCT_CLM510 0x3604 +#define YUGA_PRODUCT_CLM800 0x3605 +#define YUGA_PRODUCT_CLM900 0x3606 + +#define YUGA_PRODUCT_CLU718 0x3607 +#define YUGA_PRODUCT_CLU716 0x3608 +#define YUGA_PRODUCT_CLU728 0x3609 +#define YUGA_PRODUCT_CLU726 0x360A +#define YUGA_PRODUCT_CLU518 0x360B +#define YUGA_PRODUCT_CLU516 0x360C +#define YUGA_PRODUCT_CLU528 0x360D +#define YUGA_PRODUCT_CLU526 0x360F /* Viettel products */ #define VIETTEL_VENDOR_ID 0x2262 @@ -666,6 +673,11 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t) &huawei_cdc12_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0x01, 0x31) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0x01, 0x32) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x31) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x32) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x33) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) }, @@ -1209,6 +1221,11 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU881) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU882) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU581) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU582) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU583) }, { USB_DEVICE_AND_INTERFACE_INFO(VIETTEL_VENDOR_ID, VIETTEL_PRODUCT_VT1000, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZD_VENDOR_ID, ZD_PRODUCT_7000, 0xff, 0xff, 0xff) }, { USB_DEVICE(LG_VENDOR_ID, LG_PRODUCT_L02C) }, /* docomo L-02C modem */ @@ -1257,7 +1274,7 @@ static struct usb_serial_driver option_1port_device = { .ioctl = usb_wwan_ioctl, .attach = usb_wwan_startup, .disconnect = usb_wwan_disconnect, - .release = usb_wwan_release, + .release = option_release, .read_int_callback = option_instat_callback, #ifdef CONFIG_PM .suspend = usb_wwan_suspend, @@ -1271,35 +1288,6 @@ static struct usb_serial_driver * const serial_drivers[] = { static bool debug; -/* per port private data */ - -#define N_IN_URB 4 -#define N_OUT_URB 4 -#define IN_BUFLEN 4096 -#define OUT_BUFLEN 4096 - -struct option_port_private { - /* Input endpoints and buffer for this port */ - struct urb *in_urbs[N_IN_URB]; - u8 *in_buffer[N_IN_URB]; - /* Output endpoints and buffer for this port */ - struct urb *out_urbs[N_OUT_URB]; - u8 *out_buffer[N_OUT_URB]; - unsigned long out_busy; /* Bit vector of URBs in use */ - int opened; - struct usb_anchor delayed; - - /* Settings for the port */ - int rts_state; /* Handshaking pins (outputs) */ - int dtr_state; - int cts_state; /* Handshaking pins (inputs) */ - int dsr_state; - int dcd_state; - int ri_state; - - unsigned long tx_start_time[N_OUT_URB]; -}; - module_usb_serial_driver(option_driver, serial_drivers); static bool is_blacklisted(const u8 ifnum, enum option_blacklist_reason reason, @@ -1368,12 +1356,22 @@ static int option_probe(struct usb_serial *serial, return 0; } +static void option_release(struct usb_serial *serial) +{ + struct usb_wwan_intf_private *priv = usb_get_serial_data(serial); + + usb_wwan_release(serial); + + kfree(priv); +} + static void option_instat_callback(struct urb *urb) { int err; int status = urb->status; struct usb_serial_port *port = urb->context; - struct option_port_private *portdata = usb_get_serial_port_data(port); + struct usb_wwan_port_private *portdata = + usb_get_serial_port_data(port); dbg("%s", __func__); dbg("%s: urb %p port %p has data %p", __func__, urb, port, portdata); @@ -1434,7 +1432,7 @@ static int option_send_setup(struct usb_serial_port *port) struct usb_serial *serial = port->serial; struct usb_wwan_intf_private *intfdata = (struct usb_wwan_intf_private *) serial->private; - struct option_port_private *portdata; + struct usb_wwan_port_private *portdata; int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber; int val = 0; dbg("%s", __func__); diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 0206b10c9e6..50b5371719f 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -105,7 +105,13 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x1410, 0xa021)}, /* Novatel Gobi 3000 Composite */ {USB_DEVICE(0x413c, 0x8193)}, /* Dell Gobi 3000 QDL */ {USB_DEVICE(0x413c, 0x8194)}, /* Dell Gobi 3000 Composite */ + {USB_DEVICE(0x1199, 0x9010)}, /* Sierra Wireless Gobi 3000 QDL */ + {USB_DEVICE(0x1199, 0x9012)}, /* Sierra Wireless Gobi 3000 QDL */ {USB_DEVICE(0x1199, 0x9013)}, /* Sierra Wireless Gobi 3000 Modem device (MC8355) */ + {USB_DEVICE(0x1199, 0x9014)}, /* Sierra Wireless Gobi 3000 QDL */ + {USB_DEVICE(0x1199, 0x9015)}, /* Sierra Wireless Gobi 3000 Modem device */ + {USB_DEVICE(0x1199, 0x9018)}, /* Sierra Wireless Gobi 3000 QDL */ + {USB_DEVICE(0x1199, 0x9019)}, /* Sierra Wireless Gobi 3000 Modem device */ {USB_DEVICE(0x12D1, 0x14F0)}, /* Sony Gobi 3000 QDL */ {USB_DEVICE(0x12D1, 0x14F1)}, /* Sony Gobi 3000 Composite */ { } /* Terminating entry */ diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 8c8bf806f6f..449bf6d31e2 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -304,6 +304,10 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, + /* AT&T Direct IP LTE modems */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF), + .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist + }, { USB_DEVICE(0x0f3d, 0x68A3), /* Airprime/Sierra Wireless Direct IP modems */ .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 6933355c964..bcf261778d0 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -670,12 +670,14 @@ exit: static struct usb_serial_driver *search_serial_device( struct usb_interface *iface) { - const struct usb_device_id *id; + const struct usb_device_id *id = NULL; struct usb_serial_driver *drv; + struct usb_driver *driver = to_usb_driver(iface->dev.driver); /* Check if the usb id matches a known device */ list_for_each_entry(drv, &usb_serial_driver_list, driver_list) { - id = get_iface_id(drv, iface); + if (drv->usb_driver == driver) + id = get_iface_id(drv, iface); if (id) return drv; } diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 8f3cbb8dc81..f4d3e1a8e7d 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1107,6 +1107,13 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +/* Feiya QDI U2 DISK, reported by Hans de Goede <hdegoede@redhat.com> */ +UNUSUAL_DEV( 0x090c, 0x1000, 0x0000, 0xffff, + "Feiya", + "QDI U2 DISK", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_READ_CAPACITY_16 ), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 7daf4b852d1..90effcccca9 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -56,7 +56,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; - unsigned int io_size; + u64 io_size; loff_t start; int offset; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9b1eb701a0..d60c2d870ab 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1861,6 +1861,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, struct nfs4_state *res; int status; + fmode &= FMODE_READ|FMODE_WRITE; do { status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); if (status == 0) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7f71c69cdcd..e79c24e232f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -862,7 +862,7 @@ static void free_session(struct kref *kref) struct nfsd4_session *ses; int mem; - BUG_ON(!spin_is_locked(&client_lock)); + lockdep_assert_held(&client_lock); ses = container_of(kref, struct nfsd4_session, se_ref); nfsd4_del_conns(ses); spin_lock(&nfsd_drc_lock); @@ -1041,7 +1041,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - BUG_ON(!spin_is_locked(&client_lock)); + lockdep_assert_held(&client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3cc3062b376..26574c72612 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -12,6 +12,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/page-debug-flags.h> +#include <linux/uprobes.h> #include <asm/page.h> #include <asm/mmu.h> @@ -388,6 +389,7 @@ struct mm_struct { #ifdef CONFIG_CPUMASK_OFFSTACK struct cpumask cpumask_allocation; #endif + struct uprobes_state uprobes_state; }; static inline void mm_init_cpumask(struct mm_struct *mm) diff --git a/include/linux/pci.h b/include/linux/pci.h index e444f5b4911..8b2921ad51a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -176,6 +176,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + /* Device causes system crash if in D3 during S3 sleep */ + PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { diff --git a/include/linux/sched.h b/include/linux/sched.h index 81a173c0897..6869c609703 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1617,6 +1617,9 @@ struct task_struct { #ifdef CONFIG_HAVE_HW_BREAKPOINT atomic_t ptrace_bp_refcnt; #endif +#ifdef CONFIG_UPROBES + struct uprobe_task *utask; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 792d16d9cbc..47ead515c81 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -9,13 +9,15 @@ * get good packing density in that tree, so the index should be dense in * the low-order bits. * - * We arrange the `type' and `offset' fields so that `type' is at the five + * We arrange the `type' and `offset' fields so that `type' is at the seven * high-order bits of the swp_entry_t and `offset' is right-aligned in the - * remaining bits. + * remaining bits. Although `type' itself needs only five bits, we allow for + * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ -#define SWP_TYPE_SHIFT(e) (sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT) +#define SWP_TYPE_SHIFT(e) ((sizeof(e.val) * 8) - \ + (MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT)) #define SWP_OFFSET_MASK(e) ((1UL << SWP_TYPE_SHIFT(e)) - 1) /* diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h new file mode 100644 index 00000000000..efe4b3308c7 --- /dev/null +++ b/include/linux/uprobes.h @@ -0,0 +1,165 @@ +#ifndef _LINUX_UPROBES_H +#define _LINUX_UPROBES_H +/* + * User-space Probes (UProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2008-2012 + * Authors: + * Srikar Dronamraju + * Jim Keniston + * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + */ + +#include <linux/errno.h> +#include <linux/rbtree.h> + +struct vm_area_struct; +struct mm_struct; +struct inode; + +#ifdef CONFIG_ARCH_SUPPORTS_UPROBES +# include <asm/uprobes.h> +#endif + +/* flags that denote/change uprobes behaviour */ + +/* Have a copy of original instruction */ +#define UPROBE_COPY_INSN 0x1 + +/* Dont run handlers when first register/ last unregister in progress*/ +#define UPROBE_RUN_HANDLER 0x2 +/* Can skip singlestep */ +#define UPROBE_SKIP_SSTEP 0x4 + +struct uprobe_consumer { + int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + /* + * filter is optional; If a filter exists, handler is run + * if and only if filter returns true. + */ + bool (*filter)(struct uprobe_consumer *self, struct task_struct *task); + + struct uprobe_consumer *next; +}; + +#ifdef CONFIG_UPROBES +enum uprobe_task_state { + UTASK_RUNNING, + UTASK_BP_HIT, + UTASK_SSTEP, + UTASK_SSTEP_ACK, + UTASK_SSTEP_TRAPPED, +}; + +/* + * uprobe_task: Metadata of a task while it singlesteps. + */ +struct uprobe_task { + enum uprobe_task_state state; + struct arch_uprobe_task autask; + + struct uprobe *active_uprobe; + + unsigned long xol_vaddr; + unsigned long vaddr; +}; + +/* + * On a breakpoint hit, thread contests for a slot. It frees the + * slot after singlestep. Currently a fixed number of slots are + * allocated. + */ +struct xol_area { + wait_queue_head_t wq; /* if all slots are busy */ + atomic_t slot_count; /* number of in-use slots */ + unsigned long *bitmap; /* 0 = free slot */ + struct page *page; + + /* + * We keep the vma's vm_start rather than a pointer to the vma + * itself. The probed process or a naughty kernel module could make + * the vma go away, and we must handle that reasonably gracefully. + */ + unsigned long vaddr; /* Page(s) of instruction slots */ +}; + +struct uprobes_state { + struct xol_area *xol_area; + atomic_t count; +}; +extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); +extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); +extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); +extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern int uprobe_mmap(struct vm_area_struct *vma); +extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void uprobe_free_utask(struct task_struct *t); +extern void uprobe_copy_process(struct task_struct *t); +extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern int uprobe_post_sstep_notifier(struct pt_regs *regs); +extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); +extern void uprobe_notify_resume(struct pt_regs *regs); +extern bool uprobe_deny_signal(void); +extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); +extern void uprobe_clear_state(struct mm_struct *mm); +extern void uprobe_reset_state(struct mm_struct *mm); +#else /* !CONFIG_UPROBES */ +struct uprobes_state { +}; +static inline int +uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +{ + return -ENOSYS; +} +static inline void +uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +{ +} +static inline int uprobe_mmap(struct vm_area_struct *vma) +{ + return 0; +} +static inline void +uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ +} +static inline void uprobe_notify_resume(struct pt_regs *regs) +{ +} +static inline bool uprobe_deny_signal(void) +{ + return false; +} +static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return 0; +} +static inline void uprobe_free_utask(struct task_struct *t) +{ +} +static inline void uprobe_copy_process(struct task_struct *t) +{ +} +static inline void uprobe_clear_state(struct mm_struct *mm) +{ +} +static inline void uprobe_reset_state(struct mm_struct *mm) +{ +} +#endif /* !CONFIG_UPROBES */ +#endif /* _LINUX_UPROBES_H */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index d28cc78a38e..5de415707c2 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -126,8 +126,6 @@ struct usb_hcd { unsigned wireless:1; /* Wireless USB HCD */ unsigned authorized_default:1; unsigned has_tt:1; /* Integrated TT in root hub */ - unsigned broken_pci_sleep:1; /* Don't put the - controller in PCI-D3 for system sleep */ unsigned int irq; /* irq allocated */ void __iomem *regs; /* device memory/io */ diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 22d901f9caf..103f5d147b2 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile @@ -3,4 +3,7 @@ CFLAGS_REMOVE_core.o = -pg endif obj-y := core.o ring_buffer.o callchain.o + obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_UPROBES) += uprobes.o + diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c new file mode 100644 index 00000000000..d9e5ba5a9b7 --- /dev/null +++ b/kernel/events/uprobes.c @@ -0,0 +1,1665 @@ +/* + * User-space Probes (UProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2008-2012 + * Authors: + * Srikar Dronamraju + * Jim Keniston + * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> /* read_mapping_page */ +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/rmap.h> /* anon_vma_prepare */ +#include <linux/mmu_notifier.h> /* set_pte_at_notify */ +#include <linux/swap.h> /* try_to_free_swap */ +#include <linux/ptrace.h> /* user_enable_single_step */ +#include <linux/kdebug.h> /* notifier mechanism */ + +#include <linux/uprobes.h> +#include <linux/export.h> + +#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) +#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE + +static struct rb_root uprobes_tree = RB_ROOT; + +static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ + +#define UPROBES_HASH_SZ 13 + +/* + * We need separate register/unregister and mmap/munmap lock hashes because + * of mmap_sem nesting. + * + * uprobe_register() needs to install probes on (potentially) all processes + * and thus needs to acquire multiple mmap_sems (consequtively, not + * concurrently), whereas uprobe_mmap() is called while holding mmap_sem + * for the particular process doing the mmap. + * + * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem + * because of lock order against i_mmap_mutex. This means there's a hole in + * the register vma iteration where a mmap() can happen. + * + * Thus uprobe_register() can race with uprobe_mmap() and we can try and + * install a probe where one is already installed. + */ + +/* serialize (un)register */ +static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; + +#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) + +/* serialize uprobe->pending_list */ +static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; +#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) + +/* + * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe + * events active at this time. Probably a fine grained per inode count is + * better? + */ +static atomic_t uprobe_events = ATOMIC_INIT(0); + +struct uprobe { + struct rb_node rb_node; /* node in the rb tree */ + atomic_t ref; + struct rw_semaphore consumer_rwsem; + struct list_head pending_list; + struct uprobe_consumer *consumers; + struct inode *inode; /* Also hold a ref to inode */ + loff_t offset; + int flags; + struct arch_uprobe arch; +}; + +/* + * valid_vma: Verify if the specified vma is an executable vma + * Relax restrictions while unregistering: vm_flags might have + * changed after breakpoint was inserted. + * - is_register: indicates if we are in register context. + * - Return 1 if the specified virtual address is in an + * executable vma. + */ +static bool valid_vma(struct vm_area_struct *vma, bool is_register) +{ + if (!vma->vm_file) + return false; + + if (!is_register) + return true; + + if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) + == (VM_READ|VM_EXEC)) + return true; + + return false; +} + +static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) +{ + loff_t vaddr; + + vaddr = vma->vm_start + offset; + vaddr -= vma->vm_pgoff << PAGE_SHIFT; + + return vaddr; +} + +/** + * __replace_page - replace page in vma by new page. + * based on replace_page in mm/ksm.c + * + * @vma: vma that holds the pte pointing to page + * @page: the cowed page we are replacing by kpage + * @kpage: the modified page we replace page by + * + * Returns 0 on success, -EFAULT on failure. + */ +static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep; + + addr = page_address_in_vma(page, vma); + if (addr == -EFAULT) + return -EFAULT; + + ptep = page_check_address(page, mm, addr, &ptl, 0); + if (!ptep) + return -EAGAIN; + + get_page(kpage); + page_add_new_anon_rmap(kpage, vma, addr); + + if (!PageAnon(page)) { + dec_mm_counter(mm, MM_FILEPAGES); + inc_mm_counter(mm, MM_ANONPAGES); + } + + flush_cache_page(vma, addr, pte_pfn(*ptep)); + ptep_clear_flush(vma, addr, ptep); + set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); + + page_remove_rmap(page); + if (!page_mapped(page)) + try_to_free_swap(page); + put_page(page); + pte_unmap_unlock(ptep, ptl); + + return 0; +} + +/** + * is_swbp_insn - check if instruction is breakpoint instruction. + * @insn: instruction to be checked. + * Default implementation of is_swbp_insn + * Returns true if @insn is a breakpoint instruction. + */ +bool __weak is_swbp_insn(uprobe_opcode_t *insn) +{ + return *insn == UPROBE_SWBP_INSN; +} + +/* + * NOTE: + * Expect the breakpoint instruction to be the smallest size instruction for + * the architecture. If an arch has variable length instruction and the + * breakpoint instruction is not of the smallest length instruction + * supported by that architecture then we need to modify read_opcode / + * write_opcode accordingly. This would never be a problem for archs that + * have fixed length instructions. + */ + +/* + * write_opcode - write the opcode at a given virtual address. + * @auprobe: arch breakpointing information. + * @mm: the probed process address space. + * @vaddr: the virtual address to store the opcode. + * @opcode: opcode to be written at @vaddr. + * + * Called with mm->mmap_sem held (for read and with a reference to + * mm). + * + * For mm @mm, write the opcode at @vaddr. + * Return 0 (success) or a negative errno. + */ +static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long vaddr, uprobe_opcode_t opcode) +{ + struct page *old_page, *new_page; + struct address_space *mapping; + void *vaddr_old, *vaddr_new; + struct vm_area_struct *vma; + struct uprobe *uprobe; + int ret; +retry: + /* Read the page with vaddr into memory */ + ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); + if (ret <= 0) + return ret; + + ret = -EINVAL; + + /* + * We are interested in text pages only. Our pages of interest + * should be mapped for read and execute only. We desist from + * adding probes in write mapped pages since the breakpoints + * might end up in the file copy. + */ + if (!valid_vma(vma, is_swbp_insn(&opcode))) + goto put_out; + + uprobe = container_of(auprobe, struct uprobe, arch); + mapping = uprobe->inode->i_mapping; + if (mapping != vma->vm_file->f_mapping) + goto put_out; + + ret = -ENOMEM; + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); + if (!new_page) + goto put_out; + + __SetPageUptodate(new_page); + + /* + * lock page will serialize against do_wp_page()'s + * PageAnon() handling + */ + lock_page(old_page); + /* copy the page now that we've got it stable */ + vaddr_old = kmap_atomic(old_page); + vaddr_new = kmap_atomic(new_page); + + memcpy(vaddr_new, vaddr_old, PAGE_SIZE); + memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); + + kunmap_atomic(vaddr_new); + kunmap_atomic(vaddr_old); + + ret = anon_vma_prepare(vma); + if (ret) + goto unlock_out; + + lock_page(new_page); + ret = __replace_page(vma, old_page, new_page); + unlock_page(new_page); + +unlock_out: + unlock_page(old_page); + page_cache_release(new_page); + +put_out: + put_page(old_page); + + if (unlikely(ret == -EAGAIN)) + goto retry; + return ret; +} + +/** + * read_opcode - read the opcode at a given virtual address. + * @mm: the probed process address space. + * @vaddr: the virtual address to read the opcode. + * @opcode: location to store the read opcode. + * + * Called with mm->mmap_sem held (for read and with a reference to + * mm. + * + * For mm @mm, read the opcode at @vaddr and store it in @opcode. + * Return 0 (success) or a negative errno. + */ +static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode) +{ + struct page *page; + void *vaddr_new; + int ret; + + ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + if (ret <= 0) + return ret; + + lock_page(page); + vaddr_new = kmap_atomic(page); + vaddr &= ~PAGE_MASK; + memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); + kunmap_atomic(vaddr_new); + unlock_page(page); + + put_page(page); + + return 0; +} + +static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) +{ + uprobe_opcode_t opcode; + int result; + + if (current->mm == mm) { + pagefault_disable(); + result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, + sizeof(opcode)); + pagefault_enable(); + + if (likely(result == 0)) + goto out; + } + + result = read_opcode(mm, vaddr, &opcode); + if (result) + return result; +out: + if (is_swbp_insn(&opcode)) + return 1; + + return 0; +} + +/** + * set_swbp - store breakpoint at a given address. + * @auprobe: arch specific probepoint information. + * @mm: the probed process address space. + * @vaddr: the virtual address to insert the opcode. + * + * For mm @mm, store the breakpoint instruction at @vaddr. + * Return 0 (success) or a negative errno. + */ +int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) +{ + int result; + /* + * See the comment near uprobes_hash(). + */ + result = is_swbp_at_addr(mm, vaddr); + if (result == 1) + return -EEXIST; + + if (result) + return result; + + return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); +} + +/** + * set_orig_insn - Restore the original instruction. + * @mm: the probed process address space. + * @auprobe: arch specific probepoint information. + * @vaddr: the virtual address to insert the opcode. + * @verify: if true, verify existance of breakpoint instruction. + * + * For mm @mm, restore the original opcode (opcode) at @vaddr. + * Return 0 (success) or a negative errno. + */ +int __weak +set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify) +{ + if (verify) { + int result; + + result = is_swbp_at_addr(mm, vaddr); + if (!result) + return -EINVAL; + + if (result != 1) + return result; + } + return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); +} + +static int match_uprobe(struct uprobe *l, struct uprobe *r) +{ + if (l->inode < r->inode) + return -1; + + if (l->inode > r->inode) + return 1; + + if (l->offset < r->offset) + return -1; + + if (l->offset > r->offset) + return 1; + + return 0; +} + +static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) +{ + struct uprobe u = { .inode = inode, .offset = offset }; + struct rb_node *n = uprobes_tree.rb_node; + struct uprobe *uprobe; + int match; + + while (n) { + uprobe = rb_entry(n, struct uprobe, rb_node); + match = match_uprobe(&u, uprobe); + if (!match) { + atomic_inc(&uprobe->ref); + return uprobe; + } + + if (match < 0) + n = n->rb_left; + else + n = n->rb_right; + } + return NULL; +} + +/* + * Find a uprobe corresponding to a given inode:offset + * Acquires uprobes_treelock + */ +static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) +{ + struct uprobe *uprobe; + unsigned long flags; + + spin_lock_irqsave(&uprobes_treelock, flags); + uprobe = __find_uprobe(inode, offset); + spin_unlock_irqrestore(&uprobes_treelock, flags); + + return uprobe; +} + +static struct uprobe *__insert_uprobe(struct uprobe *uprobe) +{ + struct rb_node **p = &uprobes_tree.rb_node; + struct rb_node *parent = NULL; + struct uprobe *u; + int match; + + while (*p) { + parent = *p; + u = rb_entry(parent, struct uprobe, rb_node); + match = match_uprobe(uprobe, u); + if (!match) { + atomic_inc(&u->ref); + return u; + } + + if (match < 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + + } + + u = NULL; + rb_link_node(&uprobe->rb_node, parent, p); + rb_insert_color(&uprobe->rb_node, &uprobes_tree); + /* get access + creation ref */ + atomic_set(&uprobe->ref, 2); + + return u; +} + +/* + * Acquire uprobes_treelock. + * Matching uprobe already exists in rbtree; + * increment (access refcount) and return the matching uprobe. + * + * No matching uprobe; insert the uprobe in rb_tree; + * get a double refcount (access + creation) and return NULL. + */ +static struct uprobe *insert_uprobe(struct uprobe *uprobe) +{ + unsigned long flags; + struct uprobe *u; + + spin_lock_irqsave(&uprobes_treelock, flags); + u = __insert_uprobe(uprobe); + spin_unlock_irqrestore(&uprobes_treelock, flags); + + /* For now assume that the instruction need not be single-stepped */ + uprobe->flags |= UPROBE_SKIP_SSTEP; + + return u; +} + +static void put_uprobe(struct uprobe *uprobe) +{ + if (atomic_dec_and_test(&uprobe->ref)) + kfree(uprobe); +} + +static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) +{ + struct uprobe *uprobe, *cur_uprobe; + + uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL); + if (!uprobe) + return NULL; + + uprobe->inode = igrab(inode); + uprobe->offset = offset; + init_rwsem(&uprobe->consumer_rwsem); + + /* add to uprobes_tree, sorted on inode:offset */ + cur_uprobe = insert_uprobe(uprobe); + + /* a uprobe exists for this inode:offset combination */ + if (cur_uprobe) { + kfree(uprobe); + uprobe = cur_uprobe; + iput(inode); + } else { + atomic_inc(&uprobe_events); + } + + return uprobe; +} + +static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) +{ + struct uprobe_consumer *uc; + + if (!(uprobe->flags & UPROBE_RUN_HANDLER)) + return; + + down_read(&uprobe->consumer_rwsem); + for (uc = uprobe->consumers; uc; uc = uc->next) { + if (!uc->filter || uc->filter(uc, current)) + uc->handler(uc, regs); + } + up_read(&uprobe->consumer_rwsem); +} + +/* Returns the previous consumer */ +static struct uprobe_consumer * +consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) +{ + down_write(&uprobe->consumer_rwsem); + uc->next = uprobe->consumers; + uprobe->consumers = uc; + up_write(&uprobe->consumer_rwsem); + + return uc->next; +} + +/* + * For uprobe @uprobe, delete the consumer @uc. + * Return true if the @uc is deleted successfully + * or return false. + */ +static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) +{ + struct uprobe_consumer **con; + bool ret = false; + + down_write(&uprobe->consumer_rwsem); + for (con = &uprobe->consumers; *con; con = &(*con)->next) { + if (*con == uc) { + *con = uc->next; + ret = true; + break; + } + } + up_write(&uprobe->consumer_rwsem); + + return ret; +} + +static int +__copy_insn(struct address_space *mapping, struct file *filp, char *insn, + unsigned long nbytes, loff_t offset) +{ + struct page *page; + void *vaddr; + unsigned long off; + pgoff_t idx; + + if (!filp) + return -EINVAL; + + if (!mapping->a_ops->readpage) + return -EIO; + + idx = offset >> PAGE_CACHE_SHIFT; + off = offset & ~PAGE_MASK; + + /* + * Ensure that the page that has the original instruction is + * populated and in page-cache. + */ + page = read_mapping_page(mapping, idx, filp); + if (IS_ERR(page)) + return PTR_ERR(page); + + vaddr = kmap_atomic(page); + memcpy(insn, vaddr + off, nbytes); + kunmap_atomic(vaddr); + page_cache_release(page); + + return 0; +} + +static int copy_insn(struct uprobe *uprobe, struct file *filp) +{ + struct address_space *mapping; + unsigned long nbytes; + int bytes; + + nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); + mapping = uprobe->inode->i_mapping; + + /* Instruction at end of binary; copy only available bytes */ + if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) + bytes = uprobe->inode->i_size - uprobe->offset; + else + bytes = MAX_UINSN_BYTES; + + /* Instruction at the page-boundary; copy bytes in second page */ + if (nbytes < bytes) { + int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, + bytes - nbytes, uprobe->offset + nbytes); + if (err) + return err; + bytes = nbytes; + } + return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); +} + +/* + * How mm->uprobes_state.count gets updated + * uprobe_mmap() increments the count if + * - it successfully adds a breakpoint. + * - it cannot add a breakpoint, but sees that there is a underlying + * breakpoint (via a is_swbp_at_addr()). + * + * uprobe_munmap() decrements the count if + * - it sees a underlying breakpoint, (via is_swbp_at_addr) + * (Subsequent uprobe_unregister wouldnt find the breakpoint + * unless a uprobe_mmap kicks in, since the old vma would be + * dropped just after uprobe_munmap.) + * + * uprobe_register increments the count if: + * - it successfully adds a breakpoint. + * + * uprobe_unregister decrements the count if: + * - it sees a underlying breakpoint and removes successfully. + * (via is_swbp_at_addr) + * (Subsequent uprobe_munmap wouldnt find the breakpoint + * since there is no underlying breakpoint after the + * breakpoint removal.) + */ +static int +install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long vaddr) +{ + int ret; + + /* + * If probe is being deleted, unregister thread could be done with + * the vma-rmap-walk through. Adding a probe now can be fatal since + * nobody will be able to cleanup. Also we could be from fork or + * mremap path, where the probe might have already been inserted. + * Hence behave as if probe already existed. + */ + if (!uprobe->consumers) + return -EEXIST; + + if (!(uprobe->flags & UPROBE_COPY_INSN)) { + ret = copy_insn(uprobe, vma->vm_file); + if (ret) + return ret; + + if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) + return -ENOTSUPP; + + ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); + if (ret) + return ret; + + /* write_opcode() assumes we don't cross page boundary */ + BUG_ON((uprobe->offset & ~PAGE_MASK) + + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); + + uprobe->flags |= UPROBE_COPY_INSN; + } + + /* + * Ideally, should be updating the probe count after the breakpoint + * has been successfully inserted. However a thread could hit the + * breakpoint we just inserted even before the probe count is + * incremented. If this is the first breakpoint placed, breakpoint + * notifier might ignore uprobes and pass the trap to the thread. + * Hence increment before and decrement on failure. + */ + atomic_inc(&mm->uprobes_state.count); + ret = set_swbp(&uprobe->arch, mm, vaddr); + if (ret) + atomic_dec(&mm->uprobes_state.count); + + return ret; +} + +static void +remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) +{ + if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) + atomic_dec(&mm->uprobes_state.count); +} + +/* + * There could be threads that have already hit the breakpoint. They + * will recheck the current insn and restart if find_uprobe() fails. + * See find_active_uprobe(). + */ +static void delete_uprobe(struct uprobe *uprobe) +{ + unsigned long flags; + + spin_lock_irqsave(&uprobes_treelock, flags); + rb_erase(&uprobe->rb_node, &uprobes_tree); + spin_unlock_irqrestore(&uprobes_treelock, flags); + iput(uprobe->inode); + put_uprobe(uprobe); + atomic_dec(&uprobe_events); +} + +struct map_info { + struct map_info *next; + struct mm_struct *mm; + unsigned long vaddr; +}; + +static inline struct map_info *free_map_info(struct map_info *info) +{ + struct map_info *next = info->next; + kfree(info); + return next; +} + +static struct map_info * +build_map_info(struct address_space *mapping, loff_t offset, bool is_register) +{ + unsigned long pgoff = offset >> PAGE_SHIFT; + struct prio_tree_iter iter; + struct vm_area_struct *vma; + struct map_info *curr = NULL; + struct map_info *prev = NULL; + struct map_info *info; + int more = 0; + + again: + mutex_lock(&mapping->i_mmap_mutex); + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + if (!valid_vma(vma, is_register)) + continue; + + if (!prev && !more) { + /* + * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through + * reclaim. This is optimistic, no harm done if it fails. + */ + prev = kmalloc(sizeof(struct map_info), + GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (prev) + prev->next = NULL; + } + if (!prev) { + more++; + continue; + } + + if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) + continue; + + info = prev; + prev = prev->next; + info->next = curr; + curr = info; + + info->mm = vma->vm_mm; + info->vaddr = vma_address(vma, offset); + } + mutex_unlock(&mapping->i_mmap_mutex); + + if (!more) + goto out; + + prev = curr; + while (curr) { + mmput(curr->mm); + curr = curr->next; + } + + do { + info = kmalloc(sizeof(struct map_info), GFP_KERNEL); + if (!info) { + curr = ERR_PTR(-ENOMEM); + goto out; + } + info->next = prev; + prev = info; + } while (--more); + + goto again; + out: + while (prev) + prev = free_map_info(prev); + return curr; +} + +static int register_for_each_vma(struct uprobe *uprobe, bool is_register) +{ + struct map_info *info; + int err = 0; + + info = build_map_info(uprobe->inode->i_mapping, + uprobe->offset, is_register); + if (IS_ERR(info)) + return PTR_ERR(info); + + while (info) { + struct mm_struct *mm = info->mm; + struct vm_area_struct *vma; + + if (err) + goto free; + + down_write(&mm->mmap_sem); + vma = find_vma(mm, (unsigned long)info->vaddr); + if (!vma || !valid_vma(vma, is_register)) + goto unlock; + + if (vma->vm_file->f_mapping->host != uprobe->inode || + vma_address(vma, uprobe->offset) != info->vaddr) + goto unlock; + + if (is_register) { + err = install_breakpoint(uprobe, mm, vma, info->vaddr); + /* + * We can race against uprobe_mmap(), see the + * comment near uprobe_hash(). + */ + if (err == -EEXIST) + err = 0; + } else { + remove_breakpoint(uprobe, mm, info->vaddr); + } + unlock: + up_write(&mm->mmap_sem); + free: + mmput(mm); + info = free_map_info(info); + } + + return err; +} + +static int __uprobe_register(struct uprobe *uprobe) +{ + return register_for_each_vma(uprobe, true); +} + +static void __uprobe_unregister(struct uprobe *uprobe) +{ + if (!register_for_each_vma(uprobe, false)) + delete_uprobe(uprobe); + + /* TODO : cant unregister? schedule a worker thread */ +} + +/* + * uprobe_register - register a probe + * @inode: the file in which the probe has to be placed. + * @offset: offset from the start of the file. + * @uc: information on howto handle the probe.. + * + * Apart from the access refcount, uprobe_register() takes a creation + * refcount (thro alloc_uprobe) if and only if this @uprobe is getting + * inserted into the rbtree (i.e first consumer for a @inode:@offset + * tuple). Creation refcount stops uprobe_unregister from freeing the + * @uprobe even before the register operation is complete. Creation + * refcount is released when the last @uc for the @uprobe + * unregisters. + * + * Return errno if it cannot successully install probes + * else return 0 (success) + */ +int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +{ + struct uprobe *uprobe; + int ret; + + if (!inode || !uc || uc->next) + return -EINVAL; + + if (offset > i_size_read(inode)) + return -EINVAL; + + ret = 0; + mutex_lock(uprobes_hash(inode)); + uprobe = alloc_uprobe(inode, offset); + + if (uprobe && !consumer_add(uprobe, uc)) { + ret = __uprobe_register(uprobe); + if (ret) { + uprobe->consumers = NULL; + __uprobe_unregister(uprobe); + } else { + uprobe->flags |= UPROBE_RUN_HANDLER; + } + } + + mutex_unlock(uprobes_hash(inode)); + put_uprobe(uprobe); + + return ret; +} +EXPORT_SYMBOL_GPL(uprobe_register); + +/* + * uprobe_unregister - unregister a already registered probe. + * @inode: the file in which the probe has to be removed. + * @offset: offset from the start of the file. + * @uc: identify which probe if multiple probes are colocated. + */ +void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +{ + struct uprobe *uprobe; + + if (!inode || !uc) + return; + + uprobe = find_uprobe(inode, offset); + if (!uprobe) + return; + + mutex_lock(uprobes_hash(inode)); + + if (consumer_del(uprobe, uc)) { + if (!uprobe->consumers) { + __uprobe_unregister(uprobe); + uprobe->flags &= ~UPROBE_RUN_HANDLER; + } + } + + mutex_unlock(uprobes_hash(inode)); + if (uprobe) + put_uprobe(uprobe); +} +EXPORT_SYMBOL_GPL(uprobe_unregister); + +/* + * Of all the nodes that correspond to the given inode, return the node + * with the least offset. + */ +static struct rb_node *find_least_offset_node(struct inode *inode) +{ + struct uprobe u = { .inode = inode, .offset = 0}; + struct rb_node *n = uprobes_tree.rb_node; + struct rb_node *close_node = NULL; + struct uprobe *uprobe; + int match; + + while (n) { + uprobe = rb_entry(n, struct uprobe, rb_node); + match = match_uprobe(&u, uprobe); + + if (uprobe->inode == inode) + close_node = n; + + if (!match) + return close_node; + + if (match < 0) + n = n->rb_left; + else + n = n->rb_right; + } + + return close_node; +} + +/* + * For a given inode, build a list of probes that need to be inserted. + */ +static void build_probe_list(struct inode *inode, struct list_head *head) +{ + struct uprobe *uprobe; + unsigned long flags; + struct rb_node *n; + + spin_lock_irqsave(&uprobes_treelock, flags); + + n = find_least_offset_node(inode); + + for (; n; n = rb_next(n)) { + uprobe = rb_entry(n, struct uprobe, rb_node); + if (uprobe->inode != inode) + break; + + list_add(&uprobe->pending_list, head); + atomic_inc(&uprobe->ref); + } + + spin_unlock_irqrestore(&uprobes_treelock, flags); +} + +/* + * Called from mmap_region. + * called with mm->mmap_sem acquired. + * + * Return -ve no if we fail to insert probes and we cannot + * bail-out. + * Return 0 otherwise. i.e: + * + * - successful insertion of probes + * - (or) no possible probes to be inserted. + * - (or) insertion of probes failed but we can bail-out. + */ +int uprobe_mmap(struct vm_area_struct *vma) +{ + struct list_head tmp_list; + struct uprobe *uprobe; + struct inode *inode; + int ret, count; + + if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) + return 0; + + inode = vma->vm_file->f_mapping->host; + if (!inode) + return 0; + + INIT_LIST_HEAD(&tmp_list); + mutex_lock(uprobes_mmap_hash(inode)); + build_probe_list(inode, &tmp_list); + + ret = 0; + count = 0; + + list_for_each_entry(uprobe, &tmp_list, pending_list) { + if (!ret) { + loff_t vaddr = vma_address(vma, uprobe->offset); + + if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { + put_uprobe(uprobe); + continue; + } + + ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); + /* + * We can race against uprobe_register(), see the + * comment near uprobe_hash(). + */ + if (ret == -EEXIST) { + ret = 0; + + if (!is_swbp_at_addr(vma->vm_mm, vaddr)) + continue; + + /* + * Unable to insert a breakpoint, but + * breakpoint lies underneath. Increment the + * probe count. + */ + atomic_inc(&vma->vm_mm->uprobes_state.count); + } + + if (!ret) + count++; + } + put_uprobe(uprobe); + } + + mutex_unlock(uprobes_mmap_hash(inode)); + + if (ret) + atomic_sub(count, &vma->vm_mm->uprobes_state.count); + + return ret; +} + +/* + * Called in context of a munmap of a vma. + */ +void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + struct list_head tmp_list; + struct uprobe *uprobe; + struct inode *inode; + + if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) + return; + + if (!atomic_read(&vma->vm_mm->uprobes_state.count)) + return; + + inode = vma->vm_file->f_mapping->host; + if (!inode) + return; + + INIT_LIST_HEAD(&tmp_list); + mutex_lock(uprobes_mmap_hash(inode)); + build_probe_list(inode, &tmp_list); + + list_for_each_entry(uprobe, &tmp_list, pending_list) { + loff_t vaddr = vma_address(vma, uprobe->offset); + + if (vaddr >= start && vaddr < end) { + /* + * An unregister could have removed the probe before + * unmap. So check before we decrement the count. + */ + if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) + atomic_dec(&vma->vm_mm->uprobes_state.count); + } + put_uprobe(uprobe); + } + mutex_unlock(uprobes_mmap_hash(inode)); +} + +/* Slot allocation for XOL */ +static int xol_add_vma(struct xol_area *area) +{ + struct mm_struct *mm; + int ret; + + area->page = alloc_page(GFP_HIGHUSER); + if (!area->page) + return -ENOMEM; + + ret = -EALREADY; + mm = current->mm; + + down_write(&mm->mmap_sem); + if (mm->uprobes_state.xol_area) + goto fail; + + ret = -ENOMEM; + + /* Try to map as high as possible, this is only a hint. */ + area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); + if (area->vaddr & ~PAGE_MASK) { + ret = area->vaddr; + goto fail; + } + + ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, + VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->page); + if (ret) + goto fail; + + smp_wmb(); /* pairs with get_xol_area() */ + mm->uprobes_state.xol_area = area; + ret = 0; + +fail: + up_write(&mm->mmap_sem); + if (ret) + __free_page(area->page); + + return ret; +} + +static struct xol_area *get_xol_area(struct mm_struct *mm) +{ + struct xol_area *area; + + area = mm->uprobes_state.xol_area; + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + + return area; +} + +/* + * xol_alloc_area - Allocate process's xol_area. + * This area will be used for storing instructions for execution out of + * line. + * + * Returns the allocated area or NULL. + */ +static struct xol_area *xol_alloc_area(void) +{ + struct xol_area *area; + + area = kzalloc(sizeof(*area), GFP_KERNEL); + if (unlikely(!area)) + return NULL; + + area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); + + if (!area->bitmap) + goto fail; + + init_waitqueue_head(&area->wq); + if (!xol_add_vma(area)) + return area; + +fail: + kfree(area->bitmap); + kfree(area); + + return get_xol_area(current->mm); +} + +/* + * uprobe_clear_state - Free the area allocated for slots. + */ +void uprobe_clear_state(struct mm_struct *mm) +{ + struct xol_area *area = mm->uprobes_state.xol_area; + + if (!area) + return; + + put_page(area->page); + kfree(area->bitmap); + kfree(area); +} + +/* + * uprobe_reset_state - Free the area allocated for slots. + */ +void uprobe_reset_state(struct mm_struct *mm) +{ + mm->uprobes_state.xol_area = NULL; + atomic_set(&mm->uprobes_state.count, 0); +} + +/* + * - search for a free slot. + */ +static unsigned long xol_take_insn_slot(struct xol_area *area) +{ + unsigned long slot_addr; + int slot_nr; + + do { + slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE); + if (slot_nr < UINSNS_PER_PAGE) { + if (!test_and_set_bit(slot_nr, area->bitmap)) + break; + + slot_nr = UINSNS_PER_PAGE; + continue; + } + wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE)); + } while (slot_nr >= UINSNS_PER_PAGE); + + slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES); + atomic_inc(&area->slot_count); + + return slot_addr; +} + +/* + * xol_get_insn_slot - If was not allocated a slot, then + * allocate a slot. + * Returns the allocated slot address or 0. + */ +static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) +{ + struct xol_area *area; + unsigned long offset; + void *vaddr; + + area = get_xol_area(current->mm); + if (!area) { + area = xol_alloc_area(); + if (!area) + return 0; + } + current->utask->xol_vaddr = xol_take_insn_slot(area); + + /* + * Initialize the slot if xol_vaddr points to valid + * instruction slot. + */ + if (unlikely(!current->utask->xol_vaddr)) + return 0; + + current->utask->vaddr = slot_addr; + offset = current->utask->xol_vaddr & ~PAGE_MASK; + vaddr = kmap_atomic(area->page); + memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); + kunmap_atomic(vaddr); + + return current->utask->xol_vaddr; +} + +/* + * xol_free_insn_slot - If slot was earlier allocated by + * @xol_get_insn_slot(), make the slot available for + * subsequent requests. + */ +static void xol_free_insn_slot(struct task_struct *tsk) +{ + struct xol_area *area; + unsigned long vma_end; + unsigned long slot_addr; + + if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask) + return; + + slot_addr = tsk->utask->xol_vaddr; + + if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr))) + return; + + area = tsk->mm->uprobes_state.xol_area; + vma_end = area->vaddr + PAGE_SIZE; + if (area->vaddr <= slot_addr && slot_addr < vma_end) { + unsigned long offset; + int slot_nr; + + offset = slot_addr - area->vaddr; + slot_nr = offset / UPROBE_XOL_SLOT_BYTES; + if (slot_nr >= UINSNS_PER_PAGE) + return; + + clear_bit(slot_nr, area->bitmap); + atomic_dec(&area->slot_count); + if (waitqueue_active(&area->wq)) + wake_up(&area->wq); + + tsk->utask->xol_vaddr = 0; + } +} + +/** + * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs + * @regs: Reflects the saved state of the task after it has hit a breakpoint + * instruction. + * Return the address of the breakpoint instruction. + */ +unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; +} + +/* + * Called with no locks held. + * Called in context of a exiting or a exec-ing thread. + */ +void uprobe_free_utask(struct task_struct *t) +{ + struct uprobe_task *utask = t->utask; + + if (!utask) + return; + + if (utask->active_uprobe) + put_uprobe(utask->active_uprobe); + + xol_free_insn_slot(t); + kfree(utask); + t->utask = NULL; +} + +/* + * Called in context of a new clone/fork from copy_process. + */ +void uprobe_copy_process(struct task_struct *t) +{ + t->utask = NULL; +} + +/* + * Allocate a uprobe_task object for the task. + * Called when the thread hits a breakpoint for the first time. + * + * Returns: + * - pointer to new uprobe_task on success + * - NULL otherwise + */ +static struct uprobe_task *add_utask(void) +{ + struct uprobe_task *utask; + + utask = kzalloc(sizeof *utask, GFP_KERNEL); + if (unlikely(!utask)) + return NULL; + + current->utask = utask; + return utask; +} + +/* Prepare to single-step probed instruction out of line. */ +static int +pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) +{ + if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) + return 0; + + return -EFAULT; +} + +/* + * If we are singlestepping, then ensure this thread is not connected to + * non-fatal signals until completion of singlestep. When xol insn itself + * triggers the signal, restart the original insn even if the task is + * already SIGKILL'ed (since coredump should report the correct ip). This + * is even more important if the task has a handler for SIGSEGV/etc, The + * _same_ instruction should be repeated again after return from the signal + * handler, and SSTEP can never finish in this case. + */ +bool uprobe_deny_signal(void) +{ + struct task_struct *t = current; + struct uprobe_task *utask = t->utask; + + if (likely(!utask || !utask->active_uprobe)) + return false; + + WARN_ON_ONCE(utask->state != UTASK_SSTEP); + + if (signal_pending(t)) { + spin_lock_irq(&t->sighand->siglock); + clear_tsk_thread_flag(t, TIF_SIGPENDING); + spin_unlock_irq(&t->sighand->siglock); + + if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { + utask->state = UTASK_SSTEP_TRAPPED; + set_tsk_thread_flag(t, TIF_UPROBE); + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + } + } + + return true; +} + +/* + * Avoid singlestepping the original instruction if the original instruction + * is a NOP or can be emulated. + */ +static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) +{ + if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) + return true; + + uprobe->flags &= ~UPROBE_SKIP_SSTEP; + return false; +} + +static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) +{ + struct mm_struct *mm = current->mm; + struct uprobe *uprobe = NULL; + struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, bp_vaddr); + if (vma && vma->vm_start <= bp_vaddr) { + if (valid_vma(vma, false)) { + struct inode *inode; + loff_t offset; + + inode = vma->vm_file->f_mapping->host; + offset = bp_vaddr - vma->vm_start; + offset += (vma->vm_pgoff << PAGE_SHIFT); + uprobe = find_uprobe(inode, offset); + } + + if (!uprobe) + *is_swbp = is_swbp_at_addr(mm, bp_vaddr); + } else { + *is_swbp = -EFAULT; + } + up_read(&mm->mmap_sem); + + return uprobe; +} + +/* + * Run handler and ask thread to singlestep. + * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. + */ +static void handle_swbp(struct pt_regs *regs) +{ + struct uprobe_task *utask; + struct uprobe *uprobe; + unsigned long bp_vaddr; + int uninitialized_var(is_swbp); + + bp_vaddr = uprobe_get_swbp_addr(regs); + uprobe = find_active_uprobe(bp_vaddr, &is_swbp); + + if (!uprobe) { + if (is_swbp > 0) { + /* No matching uprobe; signal SIGTRAP. */ + send_sig(SIGTRAP, current, 0); + } else { + /* + * Either we raced with uprobe_unregister() or we can't + * access this memory. The latter is only possible if + * another thread plays with our ->mm. In both cases + * we can simply restart. If this vma was unmapped we + * can pretend this insn was not executed yet and get + * the (correct) SIGSEGV after restart. + */ + instruction_pointer_set(regs, bp_vaddr); + } + return; + } + + utask = current->utask; + if (!utask) { + utask = add_utask(); + /* Cannot allocate; re-execute the instruction. */ + if (!utask) + goto cleanup_ret; + } + utask->active_uprobe = uprobe; + handler_chain(uprobe, regs); + if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs)) + goto cleanup_ret; + + utask->state = UTASK_SSTEP; + if (!pre_ssout(uprobe, regs, bp_vaddr)) { + user_enable_single_step(current); + return; + } + +cleanup_ret: + if (utask) { + utask->active_uprobe = NULL; + utask->state = UTASK_RUNNING; + } + if (uprobe) { + if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) + + /* + * cannot singlestep; cannot skip instruction; + * re-execute the instruction. + */ + instruction_pointer_set(regs, bp_vaddr); + + put_uprobe(uprobe); + } +} + +/* + * Perform required fix-ups and disable singlestep. + * Allow pending signals to take effect. + */ +static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) +{ + struct uprobe *uprobe; + + uprobe = utask->active_uprobe; + if (utask->state == UTASK_SSTEP_ACK) + arch_uprobe_post_xol(&uprobe->arch, regs); + else if (utask->state == UTASK_SSTEP_TRAPPED) + arch_uprobe_abort_xol(&uprobe->arch, regs); + else + WARN_ON_ONCE(1); + + put_uprobe(uprobe); + utask->active_uprobe = NULL; + utask->state = UTASK_RUNNING; + user_disable_single_step(current); + xol_free_insn_slot(current); + + spin_lock_irq(¤t->sighand->siglock); + recalc_sigpending(); /* see uprobe_deny_signal() */ + spin_unlock_irq(¤t->sighand->siglock); +} + +/* + * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag. (and on + * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and + * allows the thread to return from interrupt. + * + * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and + * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from + * interrupt. + * + * While returning to userspace, thread notices the TIF_UPROBE flag and calls + * uprobe_notify_resume(). + */ +void uprobe_notify_resume(struct pt_regs *regs) +{ + struct uprobe_task *utask; + + utask = current->utask; + if (!utask || utask->state == UTASK_BP_HIT) + handle_swbp(regs); + else + handle_singlestep(utask, regs); +} + +/* + * uprobe_pre_sstep_notifier gets called from interrupt context as part of + * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit. + */ +int uprobe_pre_sstep_notifier(struct pt_regs *regs) +{ + struct uprobe_task *utask; + + if (!current->mm || !atomic_read(¤t->mm->uprobes_state.count)) + /* task is currently not uprobed */ + return 0; + + utask = current->utask; + if (utask) + utask->state = UTASK_BP_HIT; + + set_thread_flag(TIF_UPROBE); + + return 1; +} + +/* + * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier + * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep. + */ +int uprobe_post_sstep_notifier(struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + if (!current->mm || !utask || !utask->active_uprobe) + /* task is currently not uprobed */ + return 0; + + utask->state = UTASK_SSTEP_ACK; + set_thread_flag(TIF_UPROBE); + return 1; +} + +static struct notifier_block uprobe_exception_nb = { + .notifier_call = arch_uprobe_exception_notify, + .priority = INT_MAX-1, /* notified after kprobes, kgdb */ +}; + +static int __init init_uprobes(void) +{ + int i; + + for (i = 0; i < UPROBES_HASH_SZ; i++) { + mutex_init(&uprobes_mutex[i]); + mutex_init(&uprobes_mmap_mutex[i]); + } + + return register_die_notifier(&uprobe_exception_nb); +} +module_init(init_uprobes); + +static void __exit exit_uprobes(void) +{ +} +module_exit(exit_uprobes); diff --git a/kernel/fork.c b/kernel/fork.c index c3eafd82441..5b87e9f84dd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -68,6 +68,7 @@ #include <linux/oom.h> #include <linux/khugepaged.h> #include <linux/signalfd.h> +#include <linux/uprobes.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -423,6 +424,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; + + if (file && uprobe_mmap(tmp)) + goto out; } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); @@ -571,6 +575,7 @@ void mmput(struct mm_struct *mm) might_sleep(); if (atomic_dec_and_test(&mm->mm_users)) { + uprobe_clear_state(mm); exit_aio(mm); ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ @@ -749,6 +754,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) exit_pi_state_list(tsk); #endif + uprobe_free_utask(tsk); + /* Get rid of any cached register state */ deactivate_mm(tsk, mm); @@ -803,6 +810,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif + uprobe_reset_state(mm); if (!mm_init(mm, tsk)) goto fail_nomem; @@ -1344,6 +1352,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif + uprobe_copy_process(p); /* * sigaltstack should be cleared when sharing the same VM */ diff --git a/kernel/panic.c b/kernel/panic.c index 8ed89a175d7..9ed023b8333 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -108,8 +108,6 @@ void panic(const char *fmt, ...) */ crash_kexec(NULL); - kmsg_dump(KMSG_DUMP_PANIC); - /* * Note smp_send_stop is the usual smp shutdown function, which * unfortunately means it may not be hardened to work in a panic @@ -117,6 +115,8 @@ void panic(const char *fmt, ...) */ smp_send_stop(); + kmsg_dump(KMSG_DUMP_PANIC); + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); bust_spinlocks(0); diff --git a/kernel/signal.c b/kernel/signal.c index 17afcaf582d..60d80ab2601 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -29,6 +29,7 @@ #include <linux/pid_namespace.h> #include <linux/nsproxy.h> #include <linux/user_namespace.h> +#include <linux/uprobes.h> #define CREATE_TRACE_POINTS #include <trace/events/signal.h> @@ -2202,6 +2203,9 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct signal_struct *signal = current->signal; int signr; + if (unlikely(uprobe_deny_signal())) + return 0; + relock: /* * We'll jump back here after any time we were stopped in TASK_STOPPED. diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index f03fd83b170..e8c867173ae 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -412,6 +412,7 @@ int second_overflow(unsigned long secs) if (secs % 86400 == 0) { leap = -1; time_state = TIME_OOP; + time_tai++; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); } @@ -426,7 +427,6 @@ int second_overflow(unsigned long secs) } break; case TIME_OOP: - time_tai++; time_state = TIME_WAIT; break; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a1d2849f247..ea4bff6295f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -373,6 +373,7 @@ config KPROBE_EVENT depends on HAVE_REGS_AND_STACK_ACCESS_API bool "Enable kprobes-based dynamic events" select TRACING + select PROBE_EVENTS default y help This allows the user to add tracing events (similar to tracepoints) @@ -385,6 +386,25 @@ config KPROBE_EVENT This option is also required by perf-probe subcommand of perf tools. If you want to use perf tools, this option is strongly recommended. +config UPROBE_EVENT + bool "Enable uprobes-based dynamic events" + depends on ARCH_SUPPORTS_UPROBES + depends on MMU + select UPROBES + select PROBE_EVENTS + select TRACING + default n + help + This allows the user to add tracing events on top of userspace + dynamic events (similar to tracepoints) on the fly via the trace + events interface. Those events can be inserted wherever uprobes + can probe, and record various registers. + This option is required if you plan to use perf-probe subcommand + of perf tools on user space applications. + +config PROBE_EVENTS + def_bool n + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 5f39a07fe5e..1734c03e048 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -61,5 +61,7 @@ endif ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif +obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o +obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a22255c101..464a96f3d4c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -383,7 +383,7 @@ EXPORT_SYMBOL_GPL(tracing_on); void tracing_off(void) { if (global_trace.buffer) - ring_buffer_record_on(global_trace.buffer); + ring_buffer_record_off(global_trace.buffer); /* * This flag is only looked at when buffers haven't been * allocated yet. We don't really care about the race diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f95d65da6db..a6bf7057116 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -103,6 +103,11 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; +struct uprobe_trace_entry_head { + struct trace_entry ent; + unsigned long ip; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 580a05ec926..b31d3d5699f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -19,547 +19,15 @@ #include <linux/module.h> #include <linux/uaccess.h> -#include <linux/kprobes.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/debugfs.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/ptrace.h> -#include <linux/perf_event.h> -#include <linux/stringify.h> -#include <linux/limits.h> -#include <asm/bitsperlong.h> - -#include "trace.h" -#include "trace_output.h" - -#define MAX_TRACE_ARGS 128 -#define MAX_ARGSTR_LEN 63 -#define MAX_EVENT_NAME_LEN 64 -#define MAX_STRING_SIZE PATH_MAX -#define KPROBE_EVENT_SYSTEM "kprobes" - -/* Reserved field names */ -#define FIELD_STRING_IP "__probe_ip" -#define FIELD_STRING_RETIP "__probe_ret_ip" -#define FIELD_STRING_FUNC "__probe_func" - -const char *reserved_field_names[] = { - "common_type", - "common_flags", - "common_preempt_count", - "common_pid", - "common_tgid", - FIELD_STRING_IP, - FIELD_STRING_RETIP, - FIELD_STRING_FUNC, -}; - -/* Printing function type */ -typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, - void *); -#define PRINT_TYPE_FUNC_NAME(type) print_type_##type -#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type - -/* Printing in basic type function template */ -#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ -static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ - const char *name, \ - void *data, void *ent)\ -{ \ - return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ -} \ -static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; - -DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) -DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) -DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) -DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) -DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) -DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) -DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) -DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) - -/* data_rloc: data relative location, compatible with u32 */ -#define make_data_rloc(len, roffs) \ - (((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) -#define get_rloc_len(dl) ((u32)(dl) >> 16) -#define get_rloc_offs(dl) ((u32)(dl) & 0xffff) - -static inline void *get_rloc_data(u32 *dl) -{ - return (u8 *)dl + get_rloc_offs(*dl); -} - -/* For data_loc conversion */ -static inline void *get_loc_data(u32 *dl, void *ent) -{ - return (u8 *)ent + get_rloc_offs(*dl); -} - -/* - * Convert data_rloc to data_loc: - * data_rloc stores the offset from data_rloc itself, but data_loc - * stores the offset from event entry. - */ -#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) - -/* For defining macros, define string/string_size types */ -typedef u32 string; -typedef u32 string_size; - -/* Print type function for string type */ -static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, - const char *name, - void *data, void *ent) -{ - int len = *(u32 *)data >> 16; - - if (!len) - return trace_seq_printf(s, " %s=(fault)", name); - else - return trace_seq_printf(s, " %s=\"%s\"", name, - (const char *)get_loc_data(data, ent)); -} -static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; - -/* Data fetch function type */ -typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); - -struct fetch_param { - fetch_func_t fn; - void *data; -}; - -static __kprobes void call_fetch(struct fetch_param *fprm, - struct pt_regs *regs, void *dest) -{ - return fprm->fn(regs, fprm->data, dest); -} - -#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type -/* - * Define macro for basic types - we don't need to define s* types, because - * we have to care only about bitwidth at recording time. - */ -#define DEFINE_BASIC_FETCH_FUNCS(method) \ -DEFINE_FETCH_##method(u8) \ -DEFINE_FETCH_##method(u16) \ -DEFINE_FETCH_##method(u32) \ -DEFINE_FETCH_##method(u64) - -#define CHECK_FETCH_FUNCS(method, fn) \ - (((FETCH_FUNC_NAME(method, u8) == fn) || \ - (FETCH_FUNC_NAME(method, u16) == fn) || \ - (FETCH_FUNC_NAME(method, u32) == fn) || \ - (FETCH_FUNC_NAME(method, u64) == fn) || \ - (FETCH_FUNC_NAME(method, string) == fn) || \ - (FETCH_FUNC_NAME(method, string_size) == fn)) \ - && (fn != NULL)) - -/* Data fetch function templates */ -#define DEFINE_FETCH_reg(type) \ -static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ - void *offset, void *dest) \ -{ \ - *(type *)dest = (type)regs_get_register(regs, \ - (unsigned int)((unsigned long)offset)); \ -} -DEFINE_BASIC_FETCH_FUNCS(reg) -/* No string on the register */ -#define fetch_reg_string NULL -#define fetch_reg_string_size NULL - -#define DEFINE_FETCH_stack(type) \ -static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ - void *offset, void *dest) \ -{ \ - *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ - (unsigned int)((unsigned long)offset)); \ -} -DEFINE_BASIC_FETCH_FUNCS(stack) -/* No string on the stack entry */ -#define fetch_stack_string NULL -#define fetch_stack_string_size NULL - -#define DEFINE_FETCH_retval(type) \ -static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ - void *dummy, void *dest) \ -{ \ - *(type *)dest = (type)regs_return_value(regs); \ -} -DEFINE_BASIC_FETCH_FUNCS(retval) -/* No string on the retval */ -#define fetch_retval_string NULL -#define fetch_retval_string_size NULL - -#define DEFINE_FETCH_memory(type) \ -static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ - void *addr, void *dest) \ -{ \ - type retval; \ - if (probe_kernel_address(addr, retval)) \ - *(type *)dest = 0; \ - else \ - *(type *)dest = retval; \ -} -DEFINE_BASIC_FETCH_FUNCS(memory) -/* - * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max - * length and relative data location. - */ -static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, - void *addr, void *dest) -{ - long ret; - int maxlen = get_rloc_len(*(u32 *)dest); - u8 *dst = get_rloc_data(dest); - u8 *src = addr; - mm_segment_t old_fs = get_fs(); - if (!maxlen) - return; - /* - * Try to get string again, since the string can be changed while - * probing. - */ - set_fs(KERNEL_DS); - pagefault_disable(); - do - ret = __copy_from_user_inatomic(dst++, src++, 1); - while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); - dst[-1] = '\0'; - pagefault_enable(); - set_fs(old_fs); - - if (ret < 0) { /* Failed to fetch string */ - ((u8 *)get_rloc_data(dest))[0] = '\0'; - *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); - } else - *(u32 *)dest = make_data_rloc(src - (u8 *)addr, - get_rloc_offs(*(u32 *)dest)); -} -/* Return the length of string -- including null terminal byte */ -static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, - void *addr, void *dest) -{ - int ret, len = 0; - u8 c; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - pagefault_disable(); - do { - ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); - len++; - } while (c && ret == 0 && len < MAX_STRING_SIZE); - pagefault_enable(); - set_fs(old_fs); - - if (ret < 0) /* Failed to check the length */ - *(u32 *)dest = 0; - else - *(u32 *)dest = len; -} - -/* Memory fetching by symbol */ -struct symbol_cache { - char *symbol; - long offset; - unsigned long addr; -}; - -static unsigned long update_symbol_cache(struct symbol_cache *sc) -{ - sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); - if (sc->addr) - sc->addr += sc->offset; - return sc->addr; -} - -static void free_symbol_cache(struct symbol_cache *sc) -{ - kfree(sc->symbol); - kfree(sc); -} - -static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) -{ - struct symbol_cache *sc; - - if (!sym || strlen(sym) == 0) - return NULL; - sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); - if (!sc) - return NULL; - - sc->symbol = kstrdup(sym, GFP_KERNEL); - if (!sc->symbol) { - kfree(sc); - return NULL; - } - sc->offset = offset; - update_symbol_cache(sc); - return sc; -} - -#define DEFINE_FETCH_symbol(type) \ -static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ - void *data, void *dest) \ -{ \ - struct symbol_cache *sc = data; \ - if (sc->addr) \ - fetch_memory_##type(regs, (void *)sc->addr, dest); \ - else \ - *(type *)dest = 0; \ -} -DEFINE_BASIC_FETCH_FUNCS(symbol) -DEFINE_FETCH_symbol(string) -DEFINE_FETCH_symbol(string_size) - -/* Dereference memory access function */ -struct deref_fetch_param { - struct fetch_param orig; - long offset; -}; - -#define DEFINE_FETCH_deref(type) \ -static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ - void *data, void *dest) \ -{ \ - struct deref_fetch_param *dprm = data; \ - unsigned long addr; \ - call_fetch(&dprm->orig, regs, &addr); \ - if (addr) { \ - addr += dprm->offset; \ - fetch_memory_##type(regs, (void *)addr, dest); \ - } else \ - *(type *)dest = 0; \ -} -DEFINE_BASIC_FETCH_FUNCS(deref) -DEFINE_FETCH_deref(string) -DEFINE_FETCH_deref(string_size) - -static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) -{ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - update_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - update_symbol_cache(data->orig.data); -} - -static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) -{ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - free_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - free_symbol_cache(data->orig.data); - kfree(data); -} - -/* Bitfield fetch function */ -struct bitfield_fetch_param { - struct fetch_param orig; - unsigned char hi_shift; - unsigned char low_shift; -}; +#include "trace_probe.h" -#define DEFINE_FETCH_bitfield(type) \ -static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ - void *data, void *dest) \ -{ \ - struct bitfield_fetch_param *bprm = data; \ - type buf = 0; \ - call_fetch(&bprm->orig, regs, &buf); \ - if (buf) { \ - buf <<= bprm->hi_shift; \ - buf >>= bprm->low_shift; \ - } \ - *(type *)dest = buf; \ -} -DEFINE_BASIC_FETCH_FUNCS(bitfield) -#define fetch_bitfield_string NULL -#define fetch_bitfield_string_size NULL - -static __kprobes void -update_bitfield_fetch_param(struct bitfield_fetch_param *data) -{ - /* - * Don't check the bitfield itself, because this must be the - * last fetch function. - */ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - update_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - update_symbol_cache(data->orig.data); -} - -static __kprobes void -free_bitfield_fetch_param(struct bitfield_fetch_param *data) -{ - /* - * Don't check the bitfield itself, because this must be the - * last fetch function. - */ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - free_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - free_symbol_cache(data->orig.data); - kfree(data); -} - -/* Default (unsigned long) fetch type */ -#define __DEFAULT_FETCH_TYPE(t) u##t -#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) -#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) -#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) - -/* Fetch types */ -enum { - FETCH_MTD_reg = 0, - FETCH_MTD_stack, - FETCH_MTD_retval, - FETCH_MTD_memory, - FETCH_MTD_symbol, - FETCH_MTD_deref, - FETCH_MTD_bitfield, - FETCH_MTD_END, -}; - -#define ASSIGN_FETCH_FUNC(method, type) \ - [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) - -#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ - {.name = _name, \ - .size = _size, \ - .is_signed = sign, \ - .print = PRINT_TYPE_FUNC_NAME(ptype), \ - .fmt = PRINT_TYPE_FMT_NAME(ptype), \ - .fmttype = _fmttype, \ - .fetch = { \ -ASSIGN_FETCH_FUNC(reg, ftype), \ -ASSIGN_FETCH_FUNC(stack, ftype), \ -ASSIGN_FETCH_FUNC(retval, ftype), \ -ASSIGN_FETCH_FUNC(memory, ftype), \ -ASSIGN_FETCH_FUNC(symbol, ftype), \ -ASSIGN_FETCH_FUNC(deref, ftype), \ -ASSIGN_FETCH_FUNC(bitfield, ftype), \ - } \ - } - -#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ - __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) - -#define FETCH_TYPE_STRING 0 -#define FETCH_TYPE_STRSIZE 1 - -/* Fetch type information table */ -static const struct fetch_type { - const char *name; /* Name of type */ - size_t size; /* Byte size of type */ - int is_signed; /* Signed flag */ - print_type_func_t print; /* Print functions */ - const char *fmt; /* Fromat string */ - const char *fmttype; /* Name in format file */ - /* Fetch functions */ - fetch_func_t fetch[FETCH_MTD_END]; -} fetch_type_table[] = { - /* Special types */ - [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, - sizeof(u32), 1, "__data_loc char[]"), - [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, - string_size, sizeof(u32), 0, "u32"), - /* Basic types */ - ASSIGN_FETCH_TYPE(u8, u8, 0), - ASSIGN_FETCH_TYPE(u16, u16, 0), - ASSIGN_FETCH_TYPE(u32, u32, 0), - ASSIGN_FETCH_TYPE(u64, u64, 0), - ASSIGN_FETCH_TYPE(s8, u8, 1), - ASSIGN_FETCH_TYPE(s16, u16, 1), - ASSIGN_FETCH_TYPE(s32, u32, 1), - ASSIGN_FETCH_TYPE(s64, u64, 1), -}; - -static const struct fetch_type *find_fetch_type(const char *type) -{ - int i; - - if (!type) - type = DEFAULT_FETCH_TYPE_STR; - - /* Special case: bitfield */ - if (*type == 'b') { - unsigned long bs; - type = strchr(type, '/'); - if (!type) - goto fail; - type++; - if (strict_strtoul(type, 0, &bs)) - goto fail; - switch (bs) { - case 8: - return find_fetch_type("u8"); - case 16: - return find_fetch_type("u16"); - case 32: - return find_fetch_type("u32"); - case 64: - return find_fetch_type("u64"); - default: - goto fail; - } - } - - for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) - if (strcmp(type, fetch_type_table[i].name) == 0) - return &fetch_type_table[i]; -fail: - return NULL; -} - -/* Special function : only accept unsigned long */ -static __kprobes void fetch_stack_address(struct pt_regs *regs, - void *dummy, void *dest) -{ - *(unsigned long *)dest = kernel_stack_pointer(regs); -} - -static fetch_func_t get_fetch_size_function(const struct fetch_type *type, - fetch_func_t orig_fn) -{ - int i; - - if (type != &fetch_type_table[FETCH_TYPE_STRING]) - return NULL; /* Only string type needs size function */ - for (i = 0; i < FETCH_MTD_END; i++) - if (type->fetch[i] == orig_fn) - return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; - - WARN_ON(1); /* This should not happen */ - return NULL; -} +#define KPROBE_EVENT_SYSTEM "kprobes" /** * Kprobe event core functions */ -struct probe_arg { - struct fetch_param fetch; - struct fetch_param fetch_size; - unsigned int offset; /* Offset from argument entry */ - const char *name; /* Name of this argument */ - const char *comm; /* Command of this argument */ - const struct fetch_type *type; /* Type of this argument */ -}; - -/* Flags for trace_probe */ -#define TP_FLAG_TRACE 1 -#define TP_FLAG_PROFILE 2 -#define TP_FLAG_REGISTERED 4 - struct trace_probe { struct list_head list; struct kretprobe rp; /* Use rp.kp for kprobe use */ @@ -631,18 +99,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); -/* Check the name is good for event/group/fields */ -static int is_good_name(const char *name) -{ - if (!isalpha(*name) && *name != '_') - return 0; - while (*++name != '\0') { - if (!isalpha(*name) && !isdigit(*name) && *name != '_') - return 0; - } - return 1; -} - /* * Allocate new trace_probe and initialize it (including kprobes). */ @@ -651,7 +107,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, void *addr, const char *symbol, unsigned long offs, - int nargs, int is_return) + int nargs, bool is_return) { struct trace_probe *tp; int ret = -ENOMEM; @@ -702,34 +158,12 @@ error: return ERR_PTR(ret); } -static void update_probe_arg(struct probe_arg *arg) -{ - if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) - update_bitfield_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) - update_deref_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) - update_symbol_cache(arg->fetch.data); -} - -static void free_probe_arg(struct probe_arg *arg) -{ - if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) - free_bitfield_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) - free_deref_fetch_param(arg->fetch.data); - else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) - free_symbol_cache(arg->fetch.data); - kfree(arg->name); - kfree(arg->comm); -} - static void free_trace_probe(struct trace_probe *tp) { int i; for (i = 0; i < tp->nr_args; i++) - free_probe_arg(&tp->args[i]); + traceprobe_free_probe_arg(&tp->args[i]); kfree(tp->call.class->system); kfree(tp->call.name); @@ -787,7 +221,7 @@ static int __register_trace_probe(struct trace_probe *tp) return -EINVAL; for (i = 0; i < tp->nr_args; i++) - update_probe_arg(&tp->args[i]); + traceprobe_update_arg(&tp->args[i]); /* Set/clear disabled flag according to tp->flag */ if (trace_probe_is_enabled(tp)) @@ -919,227 +353,6 @@ static struct notifier_block trace_probe_module_nb = { .priority = 1 /* Invoked after kprobe module callback */ }; -/* Split symbol and offset. */ -static int split_symbol_offset(char *symbol, unsigned long *offset) -{ - char *tmp; - int ret; - - if (!offset) - return -EINVAL; - - tmp = strchr(symbol, '+'); - if (tmp) { - /* skip sign because strict_strtol doesn't accept '+' */ - ret = strict_strtoul(tmp + 1, 0, offset); - if (ret) - return ret; - *tmp = '\0'; - } else - *offset = 0; - return 0; -} - -#define PARAM_MAX_ARGS 16 -#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) - -static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_param *f, int is_return) -{ - int ret = 0; - unsigned long param; - - if (strcmp(arg, "retval") == 0) { - if (is_return) - f->fn = t->fetch[FETCH_MTD_retval]; - else - ret = -EINVAL; - } else if (strncmp(arg, "stack", 5) == 0) { - if (arg[5] == '\0') { - if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) - f->fn = fetch_stack_address; - else - ret = -EINVAL; - } else if (isdigit(arg[5])) { - ret = strict_strtoul(arg + 5, 10, ¶m); - if (ret || param > PARAM_MAX_STACK) - ret = -EINVAL; - else { - f->fn = t->fetch[FETCH_MTD_stack]; - f->data = (void *)param; - } - } else - ret = -EINVAL; - } else - ret = -EINVAL; - return ret; -} - -/* Recursive argument parser */ -static int __parse_probe_arg(char *arg, const struct fetch_type *t, - struct fetch_param *f, int is_return) -{ - int ret = 0; - unsigned long param; - long offset; - char *tmp; - - switch (arg[0]) { - case '$': - ret = parse_probe_vars(arg + 1, t, f, is_return); - break; - case '%': /* named register */ - ret = regs_query_register_offset(arg + 1); - if (ret >= 0) { - f->fn = t->fetch[FETCH_MTD_reg]; - f->data = (void *)(unsigned long)ret; - ret = 0; - } - break; - case '@': /* memory or symbol */ - if (isdigit(arg[1])) { - ret = strict_strtoul(arg + 1, 0, ¶m); - if (ret) - break; - f->fn = t->fetch[FETCH_MTD_memory]; - f->data = (void *)param; - } else { - ret = split_symbol_offset(arg + 1, &offset); - if (ret) - break; - f->data = alloc_symbol_cache(arg + 1, offset); - if (f->data) - f->fn = t->fetch[FETCH_MTD_symbol]; - } - break; - case '+': /* deref memory */ - arg++; /* Skip '+', because strict_strtol() rejects it. */ - case '-': - tmp = strchr(arg, '('); - if (!tmp) - break; - *tmp = '\0'; - ret = strict_strtol(arg, 0, &offset); - if (ret) - break; - arg = tmp + 1; - tmp = strrchr(arg, ')'); - if (tmp) { - struct deref_fetch_param *dprm; - const struct fetch_type *t2 = find_fetch_type(NULL); - *tmp = '\0'; - dprm = kzalloc(sizeof(struct deref_fetch_param), - GFP_KERNEL); - if (!dprm) - return -ENOMEM; - dprm->offset = offset; - ret = __parse_probe_arg(arg, t2, &dprm->orig, - is_return); - if (ret) - kfree(dprm); - else { - f->fn = t->fetch[FETCH_MTD_deref]; - f->data = (void *)dprm; - } - } - break; - } - if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ - pr_info("%s type has no corresponding fetch method.\n", - t->name); - ret = -EINVAL; - } - return ret; -} - -#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long)) - -/* Bitfield type needs to be parsed into a fetch function */ -static int __parse_bitfield_probe_arg(const char *bf, - const struct fetch_type *t, - struct fetch_param *f) -{ - struct bitfield_fetch_param *bprm; - unsigned long bw, bo; - char *tail; - - if (*bf != 'b') - return 0; - - bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); - if (!bprm) - return -ENOMEM; - bprm->orig = *f; - f->fn = t->fetch[FETCH_MTD_bitfield]; - f->data = (void *)bprm; - - bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */ - if (bw == 0 || *tail != '@') - return -EINVAL; - - bf = tail + 1; - bo = simple_strtoul(bf, &tail, 0); - if (tail == bf || *tail != '/') - return -EINVAL; - - bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo); - bprm->low_shift = bprm->hi_shift + bo; - return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; -} - -/* String length checking wrapper */ -static int parse_probe_arg(char *arg, struct trace_probe *tp, - struct probe_arg *parg, int is_return) -{ - const char *t; - int ret; - - if (strlen(arg) > MAX_ARGSTR_LEN) { - pr_info("Argument is too long.: %s\n", arg); - return -ENOSPC; - } - parg->comm = kstrdup(arg, GFP_KERNEL); - if (!parg->comm) { - pr_info("Failed to allocate memory for command '%s'.\n", arg); - return -ENOMEM; - } - t = strchr(parg->comm, ':'); - if (t) { - arg[t - parg->comm] = '\0'; - t++; - } - parg->type = find_fetch_type(t); - if (!parg->type) { - pr_info("Unsupported type: %s\n", t); - return -EINVAL; - } - parg->offset = tp->size; - tp->size += parg->type->size; - ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); - if (ret >= 0 && t != NULL) - ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); - if (ret >= 0) { - parg->fetch_size.fn = get_fetch_size_function(parg->type, - parg->fetch.fn); - parg->fetch_size.data = parg->fetch.data; - } - return ret; -} - -/* Return 1 if name is reserved or already used by another argument */ -static int conflict_field_name(const char *name, - struct probe_arg *args, int narg) -{ - int i; - for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) - if (strcmp(reserved_field_names[i], name) == 0) - return 1; - for (i = 0; i < narg; i++) - if (strcmp(args[i].name, name) == 0) - return 1; - return 0; -} - static int create_trace_probe(int argc, char **argv) { /* @@ -1162,7 +375,7 @@ static int create_trace_probe(int argc, char **argv) */ struct trace_probe *tp; int i, ret = 0; - int is_return = 0, is_delete = 0; + bool is_return = false, is_delete = false; char *symbol = NULL, *event = NULL, *group = NULL; char *arg; unsigned long offset = 0; @@ -1171,11 +384,11 @@ static int create_trace_probe(int argc, char **argv) /* argc must be >= 1 */ if (argv[0][0] == 'p') - is_return = 0; + is_return = false; else if (argv[0][0] == 'r') - is_return = 1; + is_return = true; else if (argv[0][0] == '-') - is_delete = 1; + is_delete = true; else { pr_info("Probe definition must be started with 'p', 'r' or" " '-'.\n"); @@ -1240,7 +453,7 @@ static int create_trace_probe(int argc, char **argv) /* a symbol specified */ symbol = argv[1]; /* TODO: support .init module functions */ - ret = split_symbol_offset(symbol, &offset); + ret = traceprobe_split_symbol_offset(symbol, &offset); if (ret) { pr_info("Failed to parse symbol.\n"); return ret; @@ -1302,7 +515,8 @@ static int create_trace_probe(int argc, char **argv) goto error; } - if (conflict_field_name(tp->args[i].name, tp->args, i)) { + if (traceprobe_conflict_field_name(tp->args[i].name, + tp->args, i)) { pr_info("Argument[%d] name '%s' conflicts with " "another field.\n", i, argv[i]); ret = -EINVAL; @@ -1310,7 +524,8 @@ static int create_trace_probe(int argc, char **argv) } /* Parse fetch argument */ - ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); + ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], + is_return, true); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -1412,70 +627,11 @@ static int probes_open(struct inode *inode, struct file *file) return seq_open(file, &probes_seq_op); } -static int command_trace_probe(const char *buf) -{ - char **argv; - int argc = 0, ret = 0; - - argv = argv_split(GFP_KERNEL, buf, &argc); - if (!argv) - return -ENOMEM; - - if (argc) - ret = create_trace_probe(argc, argv); - - argv_free(argv); - return ret; -} - -#define WRITE_BUFSIZE 4096 - static ssize_t probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - char *kbuf, *tmp; - int ret; - size_t done; - size_t size; - - kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - - ret = done = 0; - while (done < count) { - size = count - done; - if (size >= WRITE_BUFSIZE) - size = WRITE_BUFSIZE - 1; - if (copy_from_user(kbuf, buffer + done, size)) { - ret = -EFAULT; - goto out; - } - kbuf[size] = '\0'; - tmp = strchr(kbuf, '\n'); - if (tmp) { - *tmp = '\0'; - size = tmp - kbuf + 1; - } else if (done + size < count) { - pr_warning("Line length is too long: " - "Should be less than %d.", WRITE_BUFSIZE); - ret = -EINVAL; - goto out; - } - done += size; - /* Remove comments */ - tmp = strchr(kbuf, '#'); - if (tmp) - *tmp = '\0'; - - ret = command_trace_probe(kbuf); - if (ret) - goto out; - } - ret = done; -out: - kfree(kbuf); - return ret; + return traceprobe_probes_write(file, buffer, count, ppos, + create_trace_probe); } static const struct file_operations kprobe_events_ops = { @@ -1711,16 +867,6 @@ partial: return TRACE_TYPE_PARTIAL_LINE; } -#undef DEFINE_FIELD -#define DEFINE_FIELD(type, item, name, is_signed) \ - do { \ - ret = trace_define_field(event_call, #type, name, \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed, \ - FILTER_OTHER); \ - if (ret) \ - return ret; \ - } while (0) static int kprobe_event_define_fields(struct ftrace_event_call *event_call) { @@ -2051,8 +1197,9 @@ static __init int kprobe_trace_self_tests_init(void) pr_info("Testing kprobe tracing: "); - ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " - "$stack $stack0 +0($stack)"); + ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " + "$stack $stack0 +0($stack)", + create_trace_probe); if (WARN_ON_ONCE(ret)) { pr_warning("error on probing function entry.\n"); warn++; @@ -2066,8 +1213,8 @@ static __init int kprobe_trace_self_tests_init(void) enable_trace_probe(tp, TP_FLAG_TRACE); } - ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " - "$retval"); + ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " + "$retval", create_trace_probe); if (WARN_ON_ONCE(ret)) { pr_warning("error on probing function return.\n"); warn++; @@ -2101,13 +1248,13 @@ static __init int kprobe_trace_self_tests_init(void) } else disable_trace_probe(tp, TP_FLAG_TRACE); - ret = command_trace_probe("-:testprobe"); + ret = traceprobe_command("-:testprobe", create_trace_probe); if (WARN_ON_ONCE(ret)) { pr_warning("error on deleting a probe.\n"); warn++; } - ret = command_trace_probe("-:testprobe2"); + ret = traceprobe_command("-:testprobe2", create_trace_probe); if (WARN_ON_ONCE(ret)) { pr_warning("error on deleting a probe.\n"); warn++; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c new file mode 100644 index 00000000000..daa9980153a --- /dev/null +++ b/kernel/trace/trace_probe.c @@ -0,0 +1,839 @@ +/* + * Common code for probe-based Dynamic events. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This code was copied from kernel/trace/trace_kprobe.c written by + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Updates to make this generic: + * Copyright (C) IBM Corporation, 2010-2011 + * Author: Srikar Dronamraju + */ + +#include "trace_probe.h" + +const char *reserved_field_names[] = { + "common_type", + "common_flags", + "common_preempt_count", + "common_pid", + "common_tgid", + FIELD_STRING_IP, + FIELD_STRING_RETIP, + FIELD_STRING_FUNC, +}; + +/* Printing function type */ +#define PRINT_TYPE_FUNC_NAME(type) print_type_##type +#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type + +/* Printing in basic type function template */ +#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ +static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ + const char *name, \ + void *data, void *ent)\ +{ \ + return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ +} \ +static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; + +DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) +DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) +DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) +DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) +DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) +DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) +DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) +DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) + +static inline void *get_rloc_data(u32 *dl) +{ + return (u8 *)dl + get_rloc_offs(*dl); +} + +/* For data_loc conversion */ +static inline void *get_loc_data(u32 *dl, void *ent) +{ + return (u8 *)ent + get_rloc_offs(*dl); +} + +/* For defining macros, define string/string_size types */ +typedef u32 string; +typedef u32 string_size; + +/* Print type function for string type */ +static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, + const char *name, + void *data, void *ent) +{ + int len = *(u32 *)data >> 16; + + if (!len) + return trace_seq_printf(s, " %s=(fault)", name); + else + return trace_seq_printf(s, " %s=\"%s\"", name, + (const char *)get_loc_data(data, ent)); +} + +static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; + +#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type +/* + * Define macro for basic types - we don't need to define s* types, because + * we have to care only about bitwidth at recording time. + */ +#define DEFINE_BASIC_FETCH_FUNCS(method) \ +DEFINE_FETCH_##method(u8) \ +DEFINE_FETCH_##method(u16) \ +DEFINE_FETCH_##method(u32) \ +DEFINE_FETCH_##method(u64) + +#define CHECK_FETCH_FUNCS(method, fn) \ + (((FETCH_FUNC_NAME(method, u8) == fn) || \ + (FETCH_FUNC_NAME(method, u16) == fn) || \ + (FETCH_FUNC_NAME(method, u32) == fn) || \ + (FETCH_FUNC_NAME(method, u64) == fn) || \ + (FETCH_FUNC_NAME(method, string) == fn) || \ + (FETCH_FUNC_NAME(method, string_size) == fn)) \ + && (fn != NULL)) + +/* Data fetch function templates */ +#define DEFINE_FETCH_reg(type) \ +static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ + void *offset, void *dest) \ +{ \ + *(type *)dest = (type)regs_get_register(regs, \ + (unsigned int)((unsigned long)offset)); \ +} +DEFINE_BASIC_FETCH_FUNCS(reg) +/* No string on the register */ +#define fetch_reg_string NULL +#define fetch_reg_string_size NULL + +#define DEFINE_FETCH_stack(type) \ +static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ + void *offset, void *dest) \ +{ \ + *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ + (unsigned int)((unsigned long)offset)); \ +} +DEFINE_BASIC_FETCH_FUNCS(stack) +/* No string on the stack entry */ +#define fetch_stack_string NULL +#define fetch_stack_string_size NULL + +#define DEFINE_FETCH_retval(type) \ +static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ + void *dummy, void *dest) \ +{ \ + *(type *)dest = (type)regs_return_value(regs); \ +} +DEFINE_BASIC_FETCH_FUNCS(retval) +/* No string on the retval */ +#define fetch_retval_string NULL +#define fetch_retval_string_size NULL + +#define DEFINE_FETCH_memory(type) \ +static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ + void *addr, void *dest) \ +{ \ + type retval; \ + if (probe_kernel_address(addr, retval)) \ + *(type *)dest = 0; \ + else \ + *(type *)dest = retval; \ +} +DEFINE_BASIC_FETCH_FUNCS(memory) +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max + * length and relative data location. + */ +static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, + void *addr, void *dest) +{ + long ret; + int maxlen = get_rloc_len(*(u32 *)dest); + u8 *dst = get_rloc_data(dest); + u8 *src = addr; + mm_segment_t old_fs = get_fs(); + + if (!maxlen) + return; + + /* + * Try to get string again, since the string can be changed while + * probing. + */ + set_fs(KERNEL_DS); + pagefault_disable(); + + do + ret = __copy_from_user_inatomic(dst++, src++, 1); + while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); + + dst[-1] = '\0'; + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) { /* Failed to fetch string */ + ((u8 *)get_rloc_data(dest))[0] = '\0'; + *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); + } else { + *(u32 *)dest = make_data_rloc(src - (u8 *)addr, + get_rloc_offs(*(u32 *)dest)); + } +} + +/* Return the length of string -- including null terminal byte */ +static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, + void *addr, void *dest) +{ + mm_segment_t old_fs; + int ret, len = 0; + u8 c; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + pagefault_disable(); + + do { + ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); + len++; + } while (c && ret == 0 && len < MAX_STRING_SIZE); + + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) /* Failed to check the length */ + *(u32 *)dest = 0; + else + *(u32 *)dest = len; +} + +/* Memory fetching by symbol */ +struct symbol_cache { + char *symbol; + long offset; + unsigned long addr; +}; + +static unsigned long update_symbol_cache(struct symbol_cache *sc) +{ + sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); + + if (sc->addr) + sc->addr += sc->offset; + + return sc->addr; +} + +static void free_symbol_cache(struct symbol_cache *sc) +{ + kfree(sc->symbol); + kfree(sc); +} + +static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) +{ + struct symbol_cache *sc; + + if (!sym || strlen(sym) == 0) + return NULL; + + sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); + if (!sc) + return NULL; + + sc->symbol = kstrdup(sym, GFP_KERNEL); + if (!sc->symbol) { + kfree(sc); + return NULL; + } + sc->offset = offset; + update_symbol_cache(sc); + + return sc; +} + +#define DEFINE_FETCH_symbol(type) \ +static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ + void *data, void *dest) \ +{ \ + struct symbol_cache *sc = data; \ + if (sc->addr) \ + fetch_memory_##type(regs, (void *)sc->addr, dest); \ + else \ + *(type *)dest = 0; \ +} +DEFINE_BASIC_FETCH_FUNCS(symbol) +DEFINE_FETCH_symbol(string) +DEFINE_FETCH_symbol(string_size) + +/* Dereference memory access function */ +struct deref_fetch_param { + struct fetch_param orig; + long offset; +}; + +#define DEFINE_FETCH_deref(type) \ +static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ + void *data, void *dest) \ +{ \ + struct deref_fetch_param *dprm = data; \ + unsigned long addr; \ + call_fetch(&dprm->orig, regs, &addr); \ + if (addr) { \ + addr += dprm->offset; \ + fetch_memory_##type(regs, (void *)addr, dest); \ + } else \ + *(type *)dest = 0; \ +} +DEFINE_BASIC_FETCH_FUNCS(deref) +DEFINE_FETCH_deref(string) +DEFINE_FETCH_deref(string_size) + +static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) +{ + if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) + update_deref_fetch_param(data->orig.data); + else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) + update_symbol_cache(data->orig.data); +} + +static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) +{ + if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) + free_deref_fetch_param(data->orig.data); + else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) + free_symbol_cache(data->orig.data); + kfree(data); +} + +/* Bitfield fetch function */ +struct bitfield_fetch_param { + struct fetch_param orig; + unsigned char hi_shift; + unsigned char low_shift; +}; + +#define DEFINE_FETCH_bitfield(type) \ +static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ + void *data, void *dest) \ +{ \ + struct bitfield_fetch_param *bprm = data; \ + type buf = 0; \ + call_fetch(&bprm->orig, regs, &buf); \ + if (buf) { \ + buf <<= bprm->hi_shift; \ + buf >>= bprm->low_shift; \ + } \ + *(type *)dest = buf; \ +} + +DEFINE_BASIC_FETCH_FUNCS(bitfield) +#define fetch_bitfield_string NULL +#define fetch_bitfield_string_size NULL + +static __kprobes void +update_bitfield_fetch_param(struct bitfield_fetch_param *data) +{ + /* + * Don't check the bitfield itself, because this must be the + * last fetch function. + */ + if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) + update_deref_fetch_param(data->orig.data); + else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) + update_symbol_cache(data->orig.data); +} + +static __kprobes void +free_bitfield_fetch_param(struct bitfield_fetch_param *data) +{ + /* + * Don't check the bitfield itself, because this must be the + * last fetch function. + */ + if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) + free_deref_fetch_param(data->orig.data); + else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) + free_symbol_cache(data->orig.data); + + kfree(data); +} + +/* Default (unsigned long) fetch type */ +#define __DEFAULT_FETCH_TYPE(t) u##t +#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) +#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) +#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) + +#define ASSIGN_FETCH_FUNC(method, type) \ + [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) + +#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ + {.name = _name, \ + .size = _size, \ + .is_signed = sign, \ + .print = PRINT_TYPE_FUNC_NAME(ptype), \ + .fmt = PRINT_TYPE_FMT_NAME(ptype), \ + .fmttype = _fmttype, \ + .fetch = { \ +ASSIGN_FETCH_FUNC(reg, ftype), \ +ASSIGN_FETCH_FUNC(stack, ftype), \ +ASSIGN_FETCH_FUNC(retval, ftype), \ +ASSIGN_FETCH_FUNC(memory, ftype), \ +ASSIGN_FETCH_FUNC(symbol, ftype), \ +ASSIGN_FETCH_FUNC(deref, ftype), \ +ASSIGN_FETCH_FUNC(bitfield, ftype), \ + } \ + } + +#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ + __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) + +#define FETCH_TYPE_STRING 0 +#define FETCH_TYPE_STRSIZE 1 + +/* Fetch type information table */ +static const struct fetch_type fetch_type_table[] = { + /* Special types */ + [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, + sizeof(u32), 1, "__data_loc char[]"), + [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, + string_size, sizeof(u32), 0, "u32"), + /* Basic types */ + ASSIGN_FETCH_TYPE(u8, u8, 0), + ASSIGN_FETCH_TYPE(u16, u16, 0), + ASSIGN_FETCH_TYPE(u32, u32, 0), + ASSIGN_FETCH_TYPE(u64, u64, 0), + ASSIGN_FETCH_TYPE(s8, u8, 1), + ASSIGN_FETCH_TYPE(s16, u16, 1), + ASSIGN_FETCH_TYPE(s32, u32, 1), + ASSIGN_FETCH_TYPE(s64, u64, 1), +}; + +static const struct fetch_type *find_fetch_type(const char *type) +{ + int i; + + if (!type) + type = DEFAULT_FETCH_TYPE_STR; + + /* Special case: bitfield */ + if (*type == 'b') { + unsigned long bs; + + type = strchr(type, '/'); + if (!type) + goto fail; + + type++; + if (strict_strtoul(type, 0, &bs)) + goto fail; + + switch (bs) { + case 8: + return find_fetch_type("u8"); + case 16: + return find_fetch_type("u16"); + case 32: + return find_fetch_type("u32"); + case 64: + return find_fetch_type("u64"); + default: + goto fail; + } + } + + for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) + if (strcmp(type, fetch_type_table[i].name) == 0) + return &fetch_type_table[i]; + +fail: + return NULL; +} + +/* Special function : only accept unsigned long */ +static __kprobes void fetch_stack_address(struct pt_regs *regs, + void *dummy, void *dest) +{ + *(unsigned long *)dest = kernel_stack_pointer(regs); +} + +static fetch_func_t get_fetch_size_function(const struct fetch_type *type, + fetch_func_t orig_fn) +{ + int i; + + if (type != &fetch_type_table[FETCH_TYPE_STRING]) + return NULL; /* Only string type needs size function */ + + for (i = 0; i < FETCH_MTD_END; i++) + if (type->fetch[i] == orig_fn) + return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; + + WARN_ON(1); /* This should not happen */ + + return NULL; +} + +/* Split symbol and offset. */ +int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) +{ + char *tmp; + int ret; + + if (!offset) + return -EINVAL; + + tmp = strchr(symbol, '+'); + if (tmp) { + /* skip sign because strict_strtol doesn't accept '+' */ + ret = strict_strtoul(tmp + 1, 0, offset); + if (ret) + return ret; + + *tmp = '\0'; + } else + *offset = 0; + + return 0; +} + +#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) + +static int parse_probe_vars(char *arg, const struct fetch_type *t, + struct fetch_param *f, bool is_return) +{ + int ret = 0; + unsigned long param; + + if (strcmp(arg, "retval") == 0) { + if (is_return) + f->fn = t->fetch[FETCH_MTD_retval]; + else + ret = -EINVAL; + } else if (strncmp(arg, "stack", 5) == 0) { + if (arg[5] == '\0') { + if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) + f->fn = fetch_stack_address; + else + ret = -EINVAL; + } else if (isdigit(arg[5])) { + ret = strict_strtoul(arg + 5, 10, ¶m); + if (ret || param > PARAM_MAX_STACK) + ret = -EINVAL; + else { + f->fn = t->fetch[FETCH_MTD_stack]; + f->data = (void *)param; + } + } else + ret = -EINVAL; + } else + ret = -EINVAL; + + return ret; +} + +/* Recursive argument parser */ +static int parse_probe_arg(char *arg, const struct fetch_type *t, + struct fetch_param *f, bool is_return, bool is_kprobe) +{ + unsigned long param; + long offset; + char *tmp; + int ret; + + ret = 0; + + /* Until uprobe_events supports only reg arguments */ + if (!is_kprobe && arg[0] != '%') + return -EINVAL; + + switch (arg[0]) { + case '$': + ret = parse_probe_vars(arg + 1, t, f, is_return); + break; + + case '%': /* named register */ + ret = regs_query_register_offset(arg + 1); + if (ret >= 0) { + f->fn = t->fetch[FETCH_MTD_reg]; + f->data = (void *)(unsigned long)ret; + ret = 0; + } + break; + + case '@': /* memory or symbol */ + if (isdigit(arg[1])) { + ret = strict_strtoul(arg + 1, 0, ¶m); + if (ret) + break; + + f->fn = t->fetch[FETCH_MTD_memory]; + f->data = (void *)param; + } else { + ret = traceprobe_split_symbol_offset(arg + 1, &offset); + if (ret) + break; + + f->data = alloc_symbol_cache(arg + 1, offset); + if (f->data) + f->fn = t->fetch[FETCH_MTD_symbol]; + } + break; + + case '+': /* deref memory */ + arg++; /* Skip '+', because strict_strtol() rejects it. */ + case '-': + tmp = strchr(arg, '('); + if (!tmp) + break; + + *tmp = '\0'; + ret = strict_strtol(arg, 0, &offset); + + if (ret) + break; + + arg = tmp + 1; + tmp = strrchr(arg, ')'); + + if (tmp) { + struct deref_fetch_param *dprm; + const struct fetch_type *t2; + + t2 = find_fetch_type(NULL); + *tmp = '\0'; + dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL); + + if (!dprm) + return -ENOMEM; + + dprm->offset = offset; + ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, + is_kprobe); + if (ret) + kfree(dprm); + else { + f->fn = t->fetch[FETCH_MTD_deref]; + f->data = (void *)dprm; + } + } + break; + } + if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ + pr_info("%s type has no corresponding fetch method.\n", t->name); + ret = -EINVAL; + } + + return ret; +} + +#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long)) + +/* Bitfield type needs to be parsed into a fetch function */ +static int __parse_bitfield_probe_arg(const char *bf, + const struct fetch_type *t, + struct fetch_param *f) +{ + struct bitfield_fetch_param *bprm; + unsigned long bw, bo; + char *tail; + + if (*bf != 'b') + return 0; + + bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + if (!bprm) + return -ENOMEM; + + bprm->orig = *f; + f->fn = t->fetch[FETCH_MTD_bitfield]; + f->data = (void *)bprm; + bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */ + + if (bw == 0 || *tail != '@') + return -EINVAL; + + bf = tail + 1; + bo = simple_strtoul(bf, &tail, 0); + + if (tail == bf || *tail != '/') + return -EINVAL; + + bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo); + bprm->low_shift = bprm->hi_shift + bo; + + return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; +} + +/* String length checking wrapper */ +int traceprobe_parse_probe_arg(char *arg, ssize_t *size, + struct probe_arg *parg, bool is_return, bool is_kprobe) +{ + const char *t; + int ret; + + if (strlen(arg) > MAX_ARGSTR_LEN) { + pr_info("Argument is too long.: %s\n", arg); + return -ENOSPC; + } + parg->comm = kstrdup(arg, GFP_KERNEL); + if (!parg->comm) { + pr_info("Failed to allocate memory for command '%s'.\n", arg); + return -ENOMEM; + } + t = strchr(parg->comm, ':'); + if (t) { + arg[t - parg->comm] = '\0'; + t++; + } + parg->type = find_fetch_type(t); + if (!parg->type) { + pr_info("Unsupported type: %s\n", t); + return -EINVAL; + } + parg->offset = *size; + *size += parg->type->size; + ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe); + + if (ret >= 0 && t != NULL) + ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); + + if (ret >= 0) { + parg->fetch_size.fn = get_fetch_size_function(parg->type, + parg->fetch.fn); + parg->fetch_size.data = parg->fetch.data; + } + + return ret; +} + +/* Return 1 if name is reserved or already used by another argument */ +int traceprobe_conflict_field_name(const char *name, + struct probe_arg *args, int narg) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) + if (strcmp(reserved_field_names[i], name) == 0) + return 1; + + for (i = 0; i < narg; i++) + if (strcmp(args[i].name, name) == 0) + return 1; + + return 0; +} + +void traceprobe_update_arg(struct probe_arg *arg) +{ + if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) + update_bitfield_fetch_param(arg->fetch.data); + else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) + update_deref_fetch_param(arg->fetch.data); + else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) + update_symbol_cache(arg->fetch.data); +} + +void traceprobe_free_probe_arg(struct probe_arg *arg) +{ + if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) + free_bitfield_fetch_param(arg->fetch.data); + else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) + free_deref_fetch_param(arg->fetch.data); + else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) + free_symbol_cache(arg->fetch.data); + + kfree(arg->name); + kfree(arg->comm); +} + +int traceprobe_command(const char *buf, int (*createfn)(int, char **)) +{ + char **argv; + int argc, ret; + + argc = 0; + ret = 0; + argv = argv_split(GFP_KERNEL, buf, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, argv); + + argv_free(argv); + + return ret; +} + +#define WRITE_BUFSIZE 4096 + +ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos, + int (*createfn)(int, char **)) +{ + char *kbuf, *tmp; + int ret = 0; + size_t done = 0; + size_t size; + + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + while (done < count) { + size = count - done; + + if (size >= WRITE_BUFSIZE) + size = WRITE_BUFSIZE - 1; + + if (copy_from_user(kbuf, buffer + done, size)) { + ret = -EFAULT; + goto out; + } + kbuf[size] = '\0'; + tmp = strchr(kbuf, '\n'); + + if (tmp) { + *tmp = '\0'; + size = tmp - kbuf + 1; + } else if (done + size < count) { + pr_warning("Line length is too long: " + "Should be less than %d.", WRITE_BUFSIZE); + ret = -EINVAL; + goto out; + } + done += size; + /* Remove comments */ + tmp = strchr(kbuf, '#'); + + if (tmp) + *tmp = '\0'; + + ret = traceprobe_command(kbuf, createfn); + if (ret) + goto out; + } + ret = done; + +out: + kfree(kbuf); + + return ret; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h new file mode 100644 index 00000000000..93370867781 --- /dev/null +++ b/kernel/trace/trace_probe.h @@ -0,0 +1,161 @@ +/* + * Common header file for probe-based Dynamic events. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This code was copied from kernel/trace/trace_kprobe.h written by + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Updates to make this generic: + * Copyright (C) IBM Corporation, 2010-2011 + * Author: Srikar Dronamraju + */ + +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/debugfs.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/ptrace.h> +#include <linux/perf_event.h> +#include <linux/kprobes.h> +#include <linux/stringify.h> +#include <linux/limits.h> +#include <linux/uaccess.h> +#include <asm/bitsperlong.h> + +#include "trace.h" +#include "trace_output.h" + +#define MAX_TRACE_ARGS 128 +#define MAX_ARGSTR_LEN 63 +#define MAX_EVENT_NAME_LEN 64 +#define MAX_STRING_SIZE PATH_MAX + +/* Reserved field names */ +#define FIELD_STRING_IP "__probe_ip" +#define FIELD_STRING_RETIP "__probe_ret_ip" +#define FIELD_STRING_FUNC "__probe_func" + +#undef DEFINE_FIELD +#define DEFINE_FIELD(type, item, name, is_signed) \ + do { \ + ret = trace_define_field(event_call, #type, name, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed, \ + FILTER_OTHER); \ + if (ret) \ + return ret; \ + } while (0) + + +/* Flags for trace_probe */ +#define TP_FLAG_TRACE 1 +#define TP_FLAG_PROFILE 2 +#define TP_FLAG_REGISTERED 4 +#define TP_FLAG_UPROBE 8 + + +/* data_rloc: data relative location, compatible with u32 */ +#define make_data_rloc(len, roffs) \ + (((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) +#define get_rloc_len(dl) ((u32)(dl) >> 16) +#define get_rloc_offs(dl) ((u32)(dl) & 0xffff) + +/* + * Convert data_rloc to data_loc: + * data_rloc stores the offset from data_rloc itself, but data_loc + * stores the offset from event entry. + */ +#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) + +/* Data fetch function type */ +typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); +/* Printing function type */ +typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *); + +/* Fetch types */ +enum { + FETCH_MTD_reg = 0, + FETCH_MTD_stack, + FETCH_MTD_retval, + FETCH_MTD_memory, + FETCH_MTD_symbol, + FETCH_MTD_deref, + FETCH_MTD_bitfield, + FETCH_MTD_END, +}; + +/* Fetch type information table */ +struct fetch_type { + const char *name; /* Name of type */ + size_t size; /* Byte size of type */ + int is_signed; /* Signed flag */ + print_type_func_t print; /* Print functions */ + const char *fmt; /* Fromat string */ + const char *fmttype; /* Name in format file */ + /* Fetch functions */ + fetch_func_t fetch[FETCH_MTD_END]; +}; + +struct fetch_param { + fetch_func_t fn; + void *data; +}; + +struct probe_arg { + struct fetch_param fetch; + struct fetch_param fetch_size; + unsigned int offset; /* Offset from argument entry */ + const char *name; /* Name of this argument */ + const char *comm; /* Command of this argument */ + const struct fetch_type *type; /* Type of this argument */ +}; + +static inline __kprobes void call_fetch(struct fetch_param *fprm, + struct pt_regs *regs, void *dest) +{ + return fprm->fn(regs, fprm->data, dest); +} + +/* Check the name is good for event/group/fields */ +static inline int is_good_name(const char *name) +{ + if (!isalpha(*name) && *name != '_') + return 0; + while (*++name != '\0') { + if (!isalpha(*name) && !isdigit(*name) && *name != '_') + return 0; + } + return 1; +} + +extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, + struct probe_arg *parg, bool is_return, bool is_kprobe); + +extern int traceprobe_conflict_field_name(const char *name, + struct probe_arg *args, int narg); + +extern void traceprobe_update_arg(struct probe_arg *arg); +extern void traceprobe_free_probe_arg(struct probe_arg *arg); + +extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset); + +extern ssize_t traceprobe_probes_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos, + int (*createfn)(int, char**)); + +extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c new file mode 100644 index 00000000000..2b36ac68549 --- /dev/null +++ b/kernel/trace/trace_uprobe.c @@ -0,0 +1,788 @@ +/* + * uprobes-based tracing events + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (C) IBM Corporation, 2010-2012 + * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/uprobes.h> +#include <linux/namei.h> + +#include "trace_probe.h" + +#define UPROBE_EVENT_SYSTEM "uprobes" + +/* + * uprobe event core functions + */ +struct trace_uprobe; +struct uprobe_trace_consumer { + struct uprobe_consumer cons; + struct trace_uprobe *tu; +}; + +struct trace_uprobe { + struct list_head list; + struct ftrace_event_class class; + struct ftrace_event_call call; + struct uprobe_trace_consumer *consumer; + struct inode *inode; + char *filename; + unsigned long offset; + unsigned long nhit; + unsigned int flags; /* For TP_FLAG_* */ + ssize_t size; /* trace entry size */ + unsigned int nr_args; + struct probe_arg args[]; +}; + +#define SIZEOF_TRACE_UPROBE(n) \ + (offsetof(struct trace_uprobe, args) + \ + (sizeof(struct probe_arg) * (n))) + +static int register_uprobe_event(struct trace_uprobe *tu); +static void unregister_uprobe_event(struct trace_uprobe *tu); + +static DEFINE_MUTEX(uprobe_lock); +static LIST_HEAD(uprobe_list); + +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); + +/* + * Allocate new trace_uprobe and initialize it (including uprobes). + */ +static struct trace_uprobe * +alloc_trace_uprobe(const char *group, const char *event, int nargs) +{ + struct trace_uprobe *tu; + + if (!event || !is_good_name(event)) + return ERR_PTR(-EINVAL); + + if (!group || !is_good_name(group)) + return ERR_PTR(-EINVAL); + + tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL); + if (!tu) + return ERR_PTR(-ENOMEM); + + tu->call.class = &tu->class; + tu->call.name = kstrdup(event, GFP_KERNEL); + if (!tu->call.name) + goto error; + + tu->class.system = kstrdup(group, GFP_KERNEL); + if (!tu->class.system) + goto error; + + INIT_LIST_HEAD(&tu->list); + return tu; + +error: + kfree(tu->call.name); + kfree(tu); + + return ERR_PTR(-ENOMEM); +} + +static void free_trace_uprobe(struct trace_uprobe *tu) +{ + int i; + + for (i = 0; i < tu->nr_args; i++) + traceprobe_free_probe_arg(&tu->args[i]); + + iput(tu->inode); + kfree(tu->call.class->system); + kfree(tu->call.name); + kfree(tu->filename); + kfree(tu); +} + +static struct trace_uprobe *find_probe_event(const char *event, const char *group) +{ + struct trace_uprobe *tu; + + list_for_each_entry(tu, &uprobe_list, list) + if (strcmp(tu->call.name, event) == 0 && + strcmp(tu->call.class->system, group) == 0) + return tu; + + return NULL; +} + +/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */ +static void unregister_trace_uprobe(struct trace_uprobe *tu) +{ + list_del(&tu->list); + unregister_uprobe_event(tu); + free_trace_uprobe(tu); +} + +/* Register a trace_uprobe and probe_event */ +static int register_trace_uprobe(struct trace_uprobe *tu) +{ + struct trace_uprobe *old_tp; + int ret; + + mutex_lock(&uprobe_lock); + + /* register as an event */ + old_tp = find_probe_event(tu->call.name, tu->call.class->system); + if (old_tp) + /* delete old event */ + unregister_trace_uprobe(old_tp); + + ret = register_uprobe_event(tu); + if (ret) { + pr_warning("Failed to register probe event(%d)\n", ret); + goto end; + } + + list_add_tail(&tu->list, &uprobe_list); + +end: + mutex_unlock(&uprobe_lock); + + return ret; +} + +/* + * Argument syntax: + * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] + * + * - Remove uprobe: -:[GRP/]EVENT + */ +static int create_trace_uprobe(int argc, char **argv) +{ + struct trace_uprobe *tu; + struct inode *inode; + char *arg, *event, *group, *filename; + char buf[MAX_EVENT_NAME_LEN]; + struct path path; + unsigned long offset; + bool is_delete; + int i, ret; + + inode = NULL; + ret = 0; + is_delete = false; + event = NULL; + group = NULL; + + /* argc must be >= 1 */ + if (argv[0][0] == '-') + is_delete = true; + else if (argv[0][0] != 'p') { + pr_info("Probe definition must be started with 'p', 'r' or" " '-'.\n"); + return -EINVAL; + } + + if (argv[0][1] == ':') { + event = &argv[0][2]; + arg = strchr(event, '/'); + + if (arg) { + group = event; + event = arg + 1; + event[-1] = '\0'; + + if (strlen(group) == 0) { + pr_info("Group name is not specified\n"); + return -EINVAL; + } + } + if (strlen(event) == 0) { + pr_info("Event name is not specified\n"); + return -EINVAL; + } + } + if (!group) + group = UPROBE_EVENT_SYSTEM; + + if (is_delete) { + if (!event) { + pr_info("Delete command needs an event name.\n"); + return -EINVAL; + } + mutex_lock(&uprobe_lock); + tu = find_probe_event(event, group); + + if (!tu) { + mutex_unlock(&uprobe_lock); + pr_info("Event %s/%s doesn't exist.\n", group, event); + return -ENOENT; + } + /* delete an event */ + unregister_trace_uprobe(tu); + mutex_unlock(&uprobe_lock); + return 0; + } + + if (argc < 2) { + pr_info("Probe point is not specified.\n"); + return -EINVAL; + } + if (isdigit(argv[1][0])) { + pr_info("probe point must be have a filename.\n"); + return -EINVAL; + } + arg = strchr(argv[1], ':'); + if (!arg) + goto fail_address_parse; + + *arg++ = '\0'; + filename = argv[1]; + ret = kern_path(filename, LOOKUP_FOLLOW, &path); + if (ret) + goto fail_address_parse; + + ret = strict_strtoul(arg, 0, &offset); + if (ret) + goto fail_address_parse; + + inode = igrab(path.dentry->d_inode); + + argc -= 2; + argv += 2; + + /* setup a probe */ + if (!event) { + char *tail = strrchr(filename, '/'); + char *ptr; + + ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL); + if (!ptr) { + ret = -ENOMEM; + goto fail_address_parse; + } + + tail = ptr; + ptr = strpbrk(tail, ".-_"); + if (ptr) + *ptr = '\0'; + + snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset); + event = buf; + kfree(tail); + } + + tu = alloc_trace_uprobe(group, event, argc); + if (IS_ERR(tu)) { + pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); + ret = PTR_ERR(tu); + goto fail_address_parse; + } + tu->offset = offset; + tu->inode = inode; + tu->filename = kstrdup(filename, GFP_KERNEL); + + if (!tu->filename) { + pr_info("Failed to allocate filename.\n"); + ret = -ENOMEM; + goto error; + } + + /* parse arguments */ + ret = 0; + for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + /* Increment count for freeing args in error case */ + tu->nr_args++; + + /* Parse argument name */ + arg = strchr(argv[i], '='); + if (arg) { + *arg++ = '\0'; + tu->args[i].name = kstrdup(argv[i], GFP_KERNEL); + } else { + arg = argv[i]; + /* If argument name is omitted, set "argN" */ + snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); + tu->args[i].name = kstrdup(buf, GFP_KERNEL); + } + + if (!tu->args[i].name) { + pr_info("Failed to allocate argument[%d] name.\n", i); + ret = -ENOMEM; + goto error; + } + + if (!is_good_name(tu->args[i].name)) { + pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name); + ret = -EINVAL; + goto error; + } + + if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) { + pr_info("Argument[%d] name '%s' conflicts with " + "another field.\n", i, argv[i]); + ret = -EINVAL; + goto error; + } + + /* Parse fetch argument */ + ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false); + if (ret) { + pr_info("Parse error at argument[%d]. (%d)\n", i, ret); + goto error; + } + } + + ret = register_trace_uprobe(tu); + if (ret) + goto error; + return 0; + +error: + free_trace_uprobe(tu); + return ret; + +fail_address_parse: + if (inode) + iput(inode); + + pr_info("Failed to parse address.\n"); + + return ret; +} + +static void cleanup_all_probes(void) +{ + struct trace_uprobe *tu; + + mutex_lock(&uprobe_lock); + while (!list_empty(&uprobe_list)) { + tu = list_entry(uprobe_list.next, struct trace_uprobe, list); + unregister_trace_uprobe(tu); + } + mutex_unlock(&uprobe_lock); +} + +/* Probes listing interfaces */ +static void *probes_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&uprobe_lock); + return seq_list_start(&uprobe_list, *pos); +} + +static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &uprobe_list, pos); +} + +static void probes_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&uprobe_lock); +} + +static int probes_seq_show(struct seq_file *m, void *v) +{ + struct trace_uprobe *tu = v; + int i; + + seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); + seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); + + for (i = 0; i < tu->nr_args; i++) + seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm); + + seq_printf(m, "\n"); + return 0; +} + +static const struct seq_operations probes_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_seq_show +}; + +static int probes_open(struct inode *inode, struct file *file) +{ + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) + cleanup_all_probes(); + + return seq_open(file, &probes_seq_op); +} + +static ssize_t probes_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe); +} + +static const struct file_operations uprobe_events_ops = { + .owner = THIS_MODULE, + .open = probes_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = probes_write, +}; + +/* Probes profiling interfaces */ +static int probes_profile_seq_show(struct seq_file *m, void *v) +{ + struct trace_uprobe *tu = v; + + seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit); + return 0; +} + +static const struct seq_operations profile_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_profile_seq_show +}; + +static int profile_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &profile_seq_op); +} + +static const struct file_operations uprobe_profile_ops = { + .owner = THIS_MODULE, + .open = profile_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* uprobe handler */ +static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) +{ + struct uprobe_trace_entry_head *entry; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + u8 *data; + int size, i, pc; + unsigned long irq_flags; + struct ftrace_event_call *call = &tu->call; + + tu->nhit++; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = sizeof(*entry) + tu->size; + + event = trace_current_buffer_lock_reserve(&buffer, call->event.type, + size, irq_flags, pc); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); + data = (u8 *)&entry[1]; + for (i = 0; i < tu->nr_args; i++) + call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); + + if (!filter_current_check_discard(buffer, call, entry, event)) + trace_buffer_unlock_commit(buffer, event, irq_flags, pc); +} + +/* Event entry printers */ +static enum print_line_t +print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) +{ + struct uprobe_trace_entry_head *field; + struct trace_seq *s = &iter->seq; + struct trace_uprobe *tu; + u8 *data; + int i; + + field = (struct uprobe_trace_entry_head *)iter->ent; + tu = container_of(event, struct trace_uprobe, call.event); + + if (!trace_seq_printf(s, "%s: (", tu->call.name)) + goto partial; + + if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, ")")) + goto partial; + + data = (u8 *)&field[1]; + for (i = 0; i < tu->nr_args; i++) { + if (!tu->args[i].type->print(s, tu->args[i].name, + data + tu->args[i].offset, field)) + goto partial; + } + + if (trace_seq_puts(s, "\n")) + return TRACE_TYPE_HANDLED; + +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static int probe_event_enable(struct trace_uprobe *tu, int flag) +{ + struct uprobe_trace_consumer *utc; + int ret = 0; + + if (!tu->inode || tu->consumer) + return -EINTR; + + utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); + if (!utc) + return -EINTR; + + utc->cons.handler = uprobe_dispatcher; + utc->cons.filter = NULL; + ret = uprobe_register(tu->inode, tu->offset, &utc->cons); + if (ret) { + kfree(utc); + return ret; + } + + tu->flags |= flag; + utc->tu = tu; + tu->consumer = utc; + + return 0; +} + +static void probe_event_disable(struct trace_uprobe *tu, int flag) +{ + if (!tu->inode || !tu->consumer) + return; + + uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); + tu->flags &= ~flag; + kfree(tu->consumer); + tu->consumer = NULL; +} + +static int uprobe_event_define_fields(struct ftrace_event_call *event_call) +{ + int ret, i; + struct uprobe_trace_entry_head field; + struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; + + DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); + /* Set argument names as fields */ + for (i = 0; i < tu->nr_args; i++) { + ret = trace_define_field(event_call, tu->args[i].type->fmttype, + tu->args[i].name, + sizeof(field) + tu->args[i].offset, + tu->args[i].type->size, + tu->args[i].type->is_signed, + FILTER_OTHER); + + if (ret) + return ret; + } + return 0; +} + +#define LEN_OR_ZERO (len ? len - pos : 0) +static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) +{ + const char *fmt, *arg; + int i; + int pos = 0; + + fmt = "(%lx)"; + arg = "REC->" FIELD_STRING_IP; + + /* When len=0, we just calculate the needed length */ + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); + + for (i = 0; i < tu->nr_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", + tu->args[i].name, tu->args[i].type->fmt); + } + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); + + for (i = 0; i < tu->nr_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", + tu->args[i].name); + } + + return pos; /* return the length of print_fmt */ +} +#undef LEN_OR_ZERO + +static int set_print_fmt(struct trace_uprobe *tu) +{ + char *print_fmt; + int len; + + /* First: called with 0 length to calculate the needed length */ + len = __set_print_fmt(tu, NULL, 0); + print_fmt = kmalloc(len + 1, GFP_KERNEL); + if (!print_fmt) + return -ENOMEM; + + /* Second: actually write the @print_fmt */ + __set_print_fmt(tu, print_fmt, len + 1); + tu->call.print_fmt = print_fmt; + + return 0; +} + +#ifdef CONFIG_PERF_EVENTS +/* uprobe profile handler */ +static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) +{ + struct ftrace_event_call *call = &tu->call; + struct uprobe_trace_entry_head *entry; + struct hlist_head *head; + u8 *data; + int size, __size, i; + int rctx; + + __size = sizeof(*entry) + tu->size; + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) + return; + + preempt_disable(); + + entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); + if (!entry) + goto out; + + entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); + data = (u8 *)&entry[1]; + for (i = 0; i < tu->nr_args; i++) + call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); + + head = this_cpu_ptr(call->perf_events); + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); + + out: + preempt_enable(); +} +#endif /* CONFIG_PERF_EVENTS */ + +static +int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) +{ + struct trace_uprobe *tu = (struct trace_uprobe *)event->data; + + switch (type) { + case TRACE_REG_REGISTER: + return probe_event_enable(tu, TP_FLAG_TRACE); + + case TRACE_REG_UNREGISTER: + probe_event_disable(tu, TP_FLAG_TRACE); + return 0; + +#ifdef CONFIG_PERF_EVENTS + case TRACE_REG_PERF_REGISTER: + return probe_event_enable(tu, TP_FLAG_PROFILE); + + case TRACE_REG_PERF_UNREGISTER: + probe_event_disable(tu, TP_FLAG_PROFILE); + return 0; +#endif + default: + return 0; + } + return 0; +} + +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) +{ + struct uprobe_trace_consumer *utc; + struct trace_uprobe *tu; + + utc = container_of(con, struct uprobe_trace_consumer, cons); + tu = utc->tu; + if (!tu || tu->consumer != utc) + return 0; + + if (tu->flags & TP_FLAG_TRACE) + uprobe_trace_func(tu, regs); + +#ifdef CONFIG_PERF_EVENTS + if (tu->flags & TP_FLAG_PROFILE) + uprobe_perf_func(tu, regs); +#endif + return 0; +} + +static struct trace_event_functions uprobe_funcs = { + .trace = print_uprobe_event +}; + +static int register_uprobe_event(struct trace_uprobe *tu) +{ + struct ftrace_event_call *call = &tu->call; + int ret; + + /* Initialize ftrace_event_call */ + INIT_LIST_HEAD(&call->class->fields); + call->event.funcs = &uprobe_funcs; + call->class->define_fields = uprobe_event_define_fields; + + if (set_print_fmt(tu) < 0) + return -ENOMEM; + + ret = register_ftrace_event(&call->event); + if (!ret) { + kfree(call->print_fmt); + return -ENODEV; + } + call->flags = 0; + call->class->reg = trace_uprobe_register; + call->data = tu; + ret = trace_add_event_call(call); + + if (ret) { + pr_info("Failed to register uprobe event: %s\n", call->name); + kfree(call->print_fmt); + unregister_ftrace_event(&call->event); + } + + return ret; +} + +static void unregister_uprobe_event(struct trace_uprobe *tu) +{ + /* tu->event is unregistered in trace_remove_event_call() */ + trace_remove_event_call(&tu->call); + kfree(tu->call.print_fmt); + tu->call.print_fmt = NULL; +} + +/* Make a trace interface for controling probe points */ +static __init int init_uprobe_trace(void) +{ + struct dentry *d_tracer; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + trace_create_file("uprobe_events", 0644, d_tracer, + NULL, &uprobe_events_ops); + /* Profile interface */ + trace_create_file("uprobe_profile", 0444, d_tracer, + NULL, &uprobe_profile_ops); + return 0; +} + +fs_initcall(init_uprobe_trace); diff --git a/mm/memory.c b/mm/memory.c index 6105f475fa8..bf8b4035277 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1307,6 +1307,9 @@ static void unmap_single_vma(struct mmu_gather *tlb, if (end <= vma->vm_start) return; + if (vma->vm_file) + uprobe_munmap(vma, start, end); + if (vma->vm_flags & VM_ACCOUNT) *nr_accounted += (end - start) >> PAGE_SHIFT; diff --git a/mm/mmap.c b/mm/mmap.c index 848ef52d960..b8c4072dd9c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -30,6 +30,7 @@ #include <linux/perf_event.h> #include <linux/audit.h> #include <linux/khugepaged.h> +#include <linux/uprobes.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -546,8 +547,15 @@ again: remove_next = 1 + (end > next->vm_end); if (file) { mapping = file->f_mapping; - if (!(vma->vm_flags & VM_NONLINEAR)) + if (!(vma->vm_flags & VM_NONLINEAR)) { root = &mapping->i_mmap; + uprobe_munmap(vma, vma->vm_start, vma->vm_end); + + if (adjust_next) + uprobe_munmap(next, next->vm_start, + next->vm_end); + } + mutex_lock(&mapping->i_mmap_mutex); if (insert) { /* @@ -617,8 +625,16 @@ again: remove_next = 1 + (end > next->vm_end); if (mapping) mutex_unlock(&mapping->i_mmap_mutex); + if (root) { + uprobe_mmap(vma); + + if (adjust_next) + uprobe_mmap(next); + } + if (remove_next) { if (file) { + uprobe_munmap(next, next->vm_start, next->vm_end); fput(file); if (next->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(mm); @@ -638,6 +654,8 @@ again: remove_next = 1 + (end > next->vm_end); goto again; } } + if (insert && file) + uprobe_mmap(insert); validate_mm(mm); @@ -1371,6 +1389,11 @@ out: mm->locked_vm += (len >> PAGE_SHIFT); } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) make_pages_present(addr, addr + len); + + if (file && uprobe_mmap(vma)) + /* matching probes but cannot insert */ + goto unmap_and_free_vma; + return addr; unmap_and_free_vma: @@ -2352,6 +2375,10 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) if ((vma->vm_flags & VM_ACCOUNT) && security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; + + if (vma->vm_file && uprobe_mmap(vma)) + return -EINVAL; + vma_link(mm, vma, prev, rb_link, rb_parent); return 0; } @@ -2421,6 +2448,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_pgoff = pgoff; if (new_vma->vm_file) { get_file(new_vma->vm_file); + + if (uprobe_mmap(new_vma)) + goto out_free_mempol; + if (vma->vm_flags & VM_EXECUTABLE) added_exe_file_vma(mm); } diff --git a/mm/swapfile.c b/mm/swapfile.c index fafc26d1b1d..38186d96a72 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1924,24 +1924,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p, /* * Find out how many pages are allowed for a single swap - * device. There are three limiting factors: 1) the number + * device. There are two limiting factors: 1) the number * of bits for the swap offset in the swp_entry_t type, and * 2) the number of bits in the swap pte as defined by the - * the different architectures, and 3) the number of free bits - * in an exceptional radix_tree entry. In order to find the + * different architectures. In order to find the * largest possible bit mask, a swap entry with swap type 0 * and swap offset ~0UL is created, encoded to a swap pte, * decoded to a swp_entry_t again, and finally the swap * offset is extracted. This will mask all the bits from * the initial ~0UL mask that can't be encoded in either * the swp_entry_t or the architecture definition of a - * swap pte. Then the same is done for a radix_tree entry. + * swap pte. */ maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))); - maxpages = swp_offset(radix_to_swp_entry( - swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; - + swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; if (maxpages > swap_header->info.last_page) { maxpages = swap_header->info.last_page + 1; /* p->max is an unsigned int: don't overflow it */ diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 3d432068f62..052d343d43f 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -192,10 +192,10 @@ static int pack_sg_list(struct scatterlist *sg, int start, s = rest_of_page(data); if (s > count) s = count; + BUG_ON(index > limit); sg_set_buf(&sg[index++], data, s); count -= s; data += s; - BUG_ON(index > limit); } return index-start; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 3b62cf28803..faa078f74b2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -71,7 +71,9 @@ static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); - wake_up(waitq); + + if (waitq) + wake_up(waitq); } static void @@ -91,11 +93,9 @@ rpc_timeout_upcall_queue(struct work_struct *work) } dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); - if (dentry) { - rpc_purge_list(&RPC_I(dentry->d_inode)->waitq, - &free_list, destroy_msg, -ETIMEDOUT); - dput(dentry); - } + rpc_purge_list(dentry ? &RPC_I(dentry->d_inode)->waitq : NULL, + &free_list, destroy_msg, -ETIMEDOUT); + dput(dentry); } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b7210f5cc89..cb7c13fc485 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1376,7 +1376,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, sizeof(req->rq_snd_buf)); return bc_send(req); } else { - /* Nothing to do to drop request */ + /* drop request */ + xprt_free_bc_request(req); return 0; } } diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index 65d525d74c5..4e190b5950b 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -2084,7 +2084,6 @@ static int wm8904_probe(struct snd_soc_codec *codec) { struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); struct wm8904_pdata *pdata = wm8904->pdata; - u16 *reg_cache = codec->reg_cache; int ret, i; codec->cache_sync = 1; @@ -2180,14 +2179,18 @@ static int wm8904_probe(struct snd_soc_codec *codec) if (!pdata->gpio_cfg[i]) continue; - reg_cache[WM8904_GPIO_CONTROL_1 + i] - = pdata->gpio_cfg[i] & 0xffff; + regmap_update_bits(wm8904->regmap, + WM8904_GPIO_CONTROL_1 + i, + 0xffff, + pdata->gpio_cfg[i]); } /* Zero is the default value for these anyway */ for (i = 0; i < WM8904_MIC_REGS; i++) - reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i] - = pdata->mic_cfg[i]; + regmap_update_bits(wm8904->regmap, + WM8904_MIC_BIAS_CONTROL_0 + i, + 0xffff, + pdata->mic_cfg[i]); } /* Set Class W by default - this will be managed by the Class diff --git a/temp-armv5tel-kirkwood b/temp-armv5tel-kirkwood index bef477cc49d..744cd0a1f31 100644 --- a/temp-armv5tel-kirkwood +++ b/temp-armv5tel-kirkwood @@ -2446,7 +2446,7 @@ CONFIG_DRM_NOUVEAU_DEBUG=y # CONFIG_DRM_PSB is not set CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set CONFIG_DRM_VMWGFX=m CONFIG_DRM_VGEM=m @@ -4062,6 +4062,7 @@ CONFIG_STACK_TRACER=y CONFIG_KPROBES=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y # CONFIG_KPROBES_SANITY_TEST is not set CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y diff --git a/temp-armv7l-highbank b/temp-armv7l-highbank index 9d0cd4d791d..e366f419897 100644 --- a/temp-armv7l-highbank +++ b/temp-armv7l-highbank @@ -2446,7 +2446,7 @@ CONFIG_DRM_NOUVEAU_DEBUG=y # CONFIG_DRM_PSB is not set CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set CONFIG_DRM_VMWGFX=m CONFIG_DRM_VGEM=m @@ -4062,6 +4062,7 @@ CONFIG_STACK_TRACER=y CONFIG_KPROBES=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y # CONFIG_KPROBES_SANITY_TEST is not set CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y diff --git a/temp-armv7l-imx b/temp-armv7l-imx index db6ad1a7adb..4145b70d6ff 100644 --- a/temp-armv7l-imx +++ b/temp-armv7l-imx @@ -2446,7 +2446,7 @@ CONFIG_DRM_NOUVEAU_DEBUG=y # CONFIG_DRM_PSB is not set CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set CONFIG_DRM_VMWGFX=m CONFIG_DRM_VGEM=m @@ -4062,6 +4062,7 @@ CONFIG_STACK_TRACER=y CONFIG_KPROBES=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y # CONFIG_KPROBES_SANITY_TEST is not set CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y diff --git a/temp-armv7l-omap b/temp-armv7l-omap index 8f0dd96012d..77005ba13a8 100644 --- a/temp-armv7l-omap +++ b/temp-armv7l-omap @@ -2446,7 +2446,7 @@ CONFIG_DRM_NOUVEAU_DEBUG=y # CONFIG_DRM_PSB is not set CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set CONFIG_DRM_VMWGFX=m CONFIG_DRM_VGEM=m @@ -4062,6 +4062,7 @@ CONFIG_FUNCTION_PROFILER=y CONFIG_KPROBES=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y # CONFIG_KPROBES_SANITY_TEST is not set CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y diff --git a/temp-armv7l-tegra b/temp-armv7l-tegra index 85264312b46..f31dc44e885 100644 --- a/temp-armv7l-tegra +++ b/temp-armv7l-tegra @@ -2446,7 +2446,7 @@ CONFIG_DRM_NOUVEAU_DEBUG=y # CONFIG_DRM_PSB is not set CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_UDL=m +# CONFIG_DRM_UDL is not set CONFIG_DRM_VMWGFX=m CONFIG_DRM_VGEM=m @@ -4062,6 +4062,7 @@ CONFIG_STACK_TRACER=y CONFIG_KPROBES=y CONFIG_KPROBE_EVENT=y +CONFIG_UPROBE_EVENT=y # CONFIG_KPROBES_SANITY_TEST is not set CONFIG_JUMP_LABEL=y CONFIG_OPTPROBES=y diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 2780d9ce48b..b715cb71592 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -77,7 +77,8 @@ OPTIONS -F:: --funcs:: - Show available functions in given module or kernel. + Show available functions in given module or kernel. With -x/--exec, + can also list functions in a user space executable / shared library. --filter=FILTER:: (Only for --vars and --funcs) Set filter. FILTER is a combination of glob @@ -98,6 +99,15 @@ OPTIONS --max-probes:: Set the maximum number of probe points for an event. Default is 128. +-x:: +--exec=PATH:: + Specify path to the executable or shared library file for user + space tracing. Can also be used with --funcs option. + +In absence of -m/-x options, perf probe checks if the first argument after +the options is an absolute path name. If its an absolute path, perf probe +uses it as a target module/target user space binary to probe. + PROBE SYNTAX ------------ Probe points are defined by following syntax. @@ -182,6 +192,13 @@ Delete all probes on schedule(). ./perf probe --del='schedule*' +Add probes at zfree() function on /bin/zsh + + ./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree + +Add probes at malloc() function on libc + + ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc SEE ALSO -------- diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 4935c09dd5b..e215ae61b2a 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -54,6 +54,7 @@ static struct { bool show_ext_vars; bool show_funcs; bool mod_events; + bool uprobes; int nevents; struct perf_probe_event events[MAX_PROBES]; struct strlist *dellist; @@ -75,6 +76,8 @@ static int parse_probe_event(const char *str) return -1; } + pev->uprobes = params.uprobes; + /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); pr_debug("%d arguments\n", pev->nargs); @@ -82,21 +85,58 @@ static int parse_probe_event(const char *str) return ret; } +static int set_target(const char *ptr) +{ + int found = 0; + const char *buf; + + /* + * The first argument after options can be an absolute path + * to an executable / library or kernel module. + * + * TODO: Support relative path, and $PATH, $LD_LIBRARY_PATH, + * short module name. + */ + if (!params.target && ptr && *ptr == '/') { + params.target = ptr; + found = 1; + buf = ptr + (strlen(ptr) - 3); + + if (strcmp(buf, ".ko")) + params.uprobes = true; + + } + + return found; +} + static int parse_probe_event_argv(int argc, const char **argv) { - int i, len, ret; + int i, len, ret, found_target; char *buf; + found_target = set_target(argv[0]); + if (found_target && argc == 1) + return 0; + /* Bind up rest arguments */ len = 0; - for (i = 0; i < argc; i++) + for (i = 0; i < argc; i++) { + if (i == 0 && found_target) + continue; + len += strlen(argv[i]) + 1; + } buf = zalloc(len + 1); if (buf == NULL) return -ENOMEM; len = 0; - for (i = 0; i < argc; i++) + for (i = 0; i < argc; i++) { + if (i == 0 && found_target) + continue; + len += sprintf(&buf[len], "%s ", argv[i]); + } params.mod_events = true; ret = parse_probe_event(buf); free(buf); @@ -125,6 +165,28 @@ static int opt_del_probe_event(const struct option *opt __used, return 0; } +static int opt_set_target(const struct option *opt, const char *str, + int unset __used) +{ + int ret = -ENOENT; + + if (str && !params.target) { + if (!strcmp(opt->long_name, "exec")) + params.uprobes = true; +#ifdef DWARF_SUPPORT + else if (!strcmp(opt->long_name, "module")) + params.uprobes = false; +#endif + else + return ret; + + params.target = str; + ret = 0; + } + + return ret; +} + #ifdef DWARF_SUPPORT static int opt_show_lines(const struct option *opt __used, const char *str, int unset __used) @@ -246,9 +308,9 @@ static const struct option options[] = { "file", "vmlinux pathname"), OPT_STRING('s', "source", &symbol_conf.source_prefix, "directory", "path to kernel source"), - OPT_STRING('m', "module", ¶ms.target, - "modname|path", - "target module name (for online) or path (for offline)"), + OPT_CALLBACK('m', "module", NULL, "modname|path", + "target module name (for online) or path (for offline)", + opt_set_target), #endif OPT__DRY_RUN(&probe_event_dry_run), OPT_INTEGER('\0', "max-probes", ¶ms.max_probe_points, @@ -260,6 +322,8 @@ static const struct option options[] = { "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n" "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)", opt_set_filter), + OPT_CALLBACK('x', "exec", NULL, "executable|path", + "target executable name or path", opt_set_target), OPT_END() }; @@ -310,6 +374,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) pr_err(" Error: Don't use --list with --funcs.\n"); usage_with_options(probe_usage, options); } + if (params.uprobes) { + pr_warning(" Error: Don't use --list with --exec.\n"); + usage_with_options(probe_usage, options); + } ret = show_perf_probe_events(); if (ret < 0) pr_err(" Error: Failed to show event list. (%d)\n", @@ -333,8 +401,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if (!params.filter) params.filter = strfilter__new(DEFAULT_FUNC_FILTER, NULL); - ret = show_available_funcs(params.target, - params.filter); + ret = show_available_funcs(params.target, params.filter, + params.uprobes); strfilter__delete(params.filter); if (ret < 0) pr_err(" Error: Failed to show functions." @@ -343,7 +411,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) } #ifdef DWARF_SUPPORT - if (params.show_lines) { + if (params.show_lines && !params.uprobes) { if (params.mod_events) { pr_err(" Error: Don't use --line with" " --add/--del.\n"); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8a8ee64e72d..0dda25d82d0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -44,6 +44,7 @@ #include "trace-event.h" /* For __unused */ #include "probe-event.h" #include "probe-finder.h" +#include "session.h" #define MAX_CMDLEN 256 #define MAX_PROBE_ARGS 128 @@ -70,6 +71,8 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) } static char *synthesize_perf_probe_point(struct perf_probe_point *pp); +static int convert_name_to_addr(struct perf_probe_event *pev, + const char *exec); static struct machine machine; /* Initialize symbol maps and path of vmlinux/modules */ @@ -170,6 +173,34 @@ const char *kernel_get_module_path(const char *module) return (dso) ? dso->long_name : NULL; } +static int init_user_exec(void) +{ + int ret = 0; + + symbol_conf.try_vmlinux_path = false; + symbol_conf.sort_by_name = true; + ret = symbol__init(); + + if (ret < 0) + pr_debug("Failed to init symbol map.\n"); + + return ret; +} + +static int convert_to_perf_probe_point(struct probe_trace_point *tp, + struct perf_probe_point *pp) +{ + pp->function = strdup(tp->symbol); + + if (pp->function == NULL) + return -ENOMEM; + + pp->offset = tp->offset; + pp->retprobe = tp->retprobe; + + return 0; +} + #ifdef DWARF_SUPPORT /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module) @@ -224,10 +255,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, if (ret <= 0) { pr_debug("Failed to find corresponding probes from " "debuginfo. Use kprobe event information.\n"); - pp->function = strdup(tp->symbol); - if (pp->function == NULL) - return -ENOMEM; - pp->offset = tp->offset; + return convert_to_perf_probe_point(tp, pp); } pp->retprobe = tp->retprobe; @@ -275,9 +303,20 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, int max_tevs, const char *target) { bool need_dwarf = perf_probe_event_need_dwarf(pev); - struct debuginfo *dinfo = open_debuginfo(target); + struct debuginfo *dinfo; int ntevs, ret = 0; + if (pev->uprobes) { + if (need_dwarf) { + pr_warning("Debuginfo-analysis is not yet supported" + " with -x/--exec option.\n"); + return -ENOSYS; + } + return convert_name_to_addr(pev, target); + } + + dinfo = open_debuginfo(target); + if (!dinfo) { if (need_dwarf) { pr_warning("Failed to open debuginfo file.\n"); @@ -603,23 +642,22 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, pr_err("Failed to find symbol %s in kernel.\n", tp->symbol); return -ENOENT; } - pp->function = strdup(tp->symbol); - if (pp->function == NULL) - return -ENOMEM; - pp->offset = tp->offset; - pp->retprobe = tp->retprobe; - return 0; + return convert_to_perf_probe_point(tp, pp); } static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs __unused, - int max_tevs __unused, const char *mod __unused) + int max_tevs __unused, const char *target) { if (perf_probe_event_need_dwarf(pev)) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; } + + if (pev->uprobes) + return convert_name_to_addr(pev, target); + return 0; } @@ -1341,11 +1379,18 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) if (buf == NULL) return NULL; - len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu", - tp->retprobe ? 'r' : 'p', - tev->group, tev->event, - tp->module ?: "", tp->module ? ":" : "", - tp->symbol, tp->offset); + if (tev->uprobes) + len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s:%s", + tp->retprobe ? 'r' : 'p', + tev->group, tev->event, + tp->module, tp->symbol); + else + len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu", + tp->retprobe ? 'r' : 'p', + tev->group, tev->event, + tp->module ?: "", tp->module ? ":" : "", + tp->symbol, tp->offset); + if (len <= 0) goto error; @@ -1364,7 +1409,7 @@ error: } static int convert_to_perf_probe_event(struct probe_trace_event *tev, - struct perf_probe_event *pev) + struct perf_probe_event *pev, bool is_kprobe) { char buf[64] = ""; int i, ret; @@ -1376,7 +1421,11 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, return -ENOMEM; /* Convert trace_point to probe_point */ - ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point); + if (is_kprobe) + ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point); + else + ret = convert_to_perf_probe_point(&tev->point, &pev->point); + if (ret < 0) return ret; @@ -1472,7 +1521,26 @@ static void clear_probe_trace_event(struct probe_trace_event *tev) memset(tev, 0, sizeof(*tev)); } -static int open_kprobe_events(bool readwrite) +static void print_warn_msg(const char *file, bool is_kprobe) +{ + + if (errno == ENOENT) { + const char *config; + + if (!is_kprobe) + config = "CONFIG_UPROBE_EVENTS"; + else + config = "CONFIG_KPROBE_EVENTS"; + + pr_warning("%s file does not exist - please rebuild kernel" + " with %s.\n", file, config); + } else + pr_warning("Failed to open %s file: %s\n", file, + strerror(errno)); +} + +static int open_probe_events(const char *trace_file, bool readwrite, + bool is_kprobe) { char buf[PATH_MAX]; const char *__debugfs; @@ -1484,27 +1552,31 @@ static int open_kprobe_events(bool readwrite) return -ENOENT; } - ret = e_snprintf(buf, PATH_MAX, "%stracing/kprobe_events", __debugfs); + ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) ret = open(buf, O_RDWR, O_APPEND); else ret = open(buf, O_RDONLY, 0); - } - if (ret < 0) { - if (errno == ENOENT) - pr_warning("kprobe_events file does not exist - please" - " rebuild kernel with CONFIG_KPROBE_EVENT.\n"); - else - pr_warning("Failed to open kprobe_events file: %s\n", - strerror(errno)); + if (ret < 0) + print_warn_msg(buf, is_kprobe); } return ret; } -/* Get raw string list of current kprobe_events */ +static int open_kprobe_events(bool readwrite) +{ + return open_probe_events("tracing/kprobe_events", readwrite, true); +} + +static int open_uprobe_events(bool readwrite) +{ + return open_probe_events("tracing/uprobe_events", readwrite, false); +} + +/* Get raw string list of current kprobe_events or uprobe_events */ static struct strlist *get_probe_trace_command_rawlist(int fd) { int ret, idx; @@ -1569,36 +1641,26 @@ static int show_perf_probe_event(struct perf_probe_event *pev) return ret; } -/* List up current perf-probe events */ -int show_perf_probe_events(void) +static int __show_perf_probe_events(int fd, bool is_kprobe) { - int fd, ret; + int ret = 0; struct probe_trace_event tev; struct perf_probe_event pev; struct strlist *rawlist; struct str_node *ent; - setup_pager(); - ret = init_vmlinux(); - if (ret < 0) - return ret; - memset(&tev, 0, sizeof(tev)); memset(&pev, 0, sizeof(pev)); - fd = open_kprobe_events(false); - if (fd < 0) - return fd; - rawlist = get_probe_trace_command_rawlist(fd); - close(fd); if (!rawlist) return -ENOENT; strlist__for_each(ent, rawlist) { ret = parse_probe_trace_command(ent->s, &tev); if (ret >= 0) { - ret = convert_to_perf_probe_event(&tev, &pev); + ret = convert_to_perf_probe_event(&tev, &pev, + is_kprobe); if (ret >= 0) ret = show_perf_probe_event(&pev); } @@ -1612,6 +1674,33 @@ int show_perf_probe_events(void) return ret; } +/* List up current perf-probe events */ +int show_perf_probe_events(void) +{ + int fd, ret; + + setup_pager(); + fd = open_kprobe_events(false); + + if (fd < 0) + return fd; + + ret = init_vmlinux(); + if (ret < 0) + return ret; + + ret = __show_perf_probe_events(fd, true); + close(fd); + + fd = open_uprobe_events(false); + if (fd >= 0) { + ret = __show_perf_probe_events(fd, false); + close(fd); + } + + return ret; +} + /* Get current perf-probe event names */ static struct strlist *get_probe_trace_event_names(int fd, bool include_group) { @@ -1717,7 +1806,11 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, const char *event, *group; struct strlist *namelist; - fd = open_kprobe_events(true); + if (pev->uprobes) + fd = open_uprobe_events(true); + else + fd = open_kprobe_events(true); + if (fd < 0) return fd; /* Get current event names */ @@ -1829,6 +1922,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, tev->point.offset = pev->point.offset; tev->point.retprobe = pev->point.retprobe; tev->nargs = pev->nargs; + tev->uprobes = pev->uprobes; + if (tev->nargs) { tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); @@ -1859,6 +1954,9 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, } } + if (pev->uprobes) + return 1; + /* Currently just checking function name from symbol map */ sym = __find_kernel_function_by_name(tev->point.symbol, NULL); if (!sym) { @@ -1894,12 +1992,18 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, int i, j, ret; struct __event_package *pkgs; + ret = 0; pkgs = zalloc(sizeof(struct __event_package) * npevs); + if (pkgs == NULL) return -ENOMEM; - /* Init vmlinux path */ - ret = init_vmlinux(); + if (!pevs->uprobes) + /* Init vmlinux path */ + ret = init_vmlinux(); + else + ret = init_user_exec(); + if (ret < 0) { free(pkgs); return ret; @@ -1971,23 +2075,15 @@ error: return ret; } -static int del_trace_probe_event(int fd, const char *group, - const char *event, struct strlist *namelist) +static int del_trace_probe_event(int fd, const char *buf, + struct strlist *namelist) { - char buf[128]; struct str_node *ent, *n; - int found = 0, ret = 0; - - ret = e_snprintf(buf, 128, "%s:%s", group, event); - if (ret < 0) { - pr_err("Failed to copy event.\n"); - return ret; - } + int ret = -1; if (strpbrk(buf, "*?")) { /* Glob-exp */ strlist__for_each_safe(ent, n, namelist) if (strglobmatch(ent->s, buf)) { - found++; ret = __del_trace_probe_event(fd, ent); if (ret < 0) break; @@ -1996,40 +2092,43 @@ static int del_trace_probe_event(int fd, const char *group, } else { ent = strlist__find(namelist, buf); if (ent) { - found++; ret = __del_trace_probe_event(fd, ent); if (ret >= 0) strlist__remove(namelist, ent); } } - if (found == 0 && ret >= 0) - pr_info("Info: Event \"%s\" does not exist.\n", buf); return ret; } int del_perf_probe_events(struct strlist *dellist) { - int fd, ret = 0; + int ret = -1, ufd = -1, kfd = -1; + char buf[128]; const char *group, *event; char *p, *str; struct str_node *ent; - struct strlist *namelist; - - fd = open_kprobe_events(true); - if (fd < 0) - return fd; + struct strlist *namelist = NULL, *unamelist = NULL; /* Get current event names */ - namelist = get_probe_trace_event_names(fd, true); - if (namelist == NULL) - return -EINVAL; + kfd = open_kprobe_events(true); + if (kfd < 0) + return kfd; + + namelist = get_probe_trace_event_names(kfd, true); + ufd = open_uprobe_events(true); + + if (ufd >= 0) + unamelist = get_probe_trace_event_names(ufd, true); + + if (namelist == NULL && unamelist == NULL) + goto error; strlist__for_each(ent, dellist) { str = strdup(ent->s); if (str == NULL) { ret = -ENOMEM; - break; + goto error; } pr_debug("Parsing: %s\n", str); p = strchr(str, ':'); @@ -2041,17 +2140,42 @@ int del_perf_probe_events(struct strlist *dellist) group = "*"; event = str; } + + ret = e_snprintf(buf, 128, "%s:%s", group, event); + if (ret < 0) { + pr_err("Failed to copy event."); + free(str); + goto error; + } + pr_debug("Group: %s, Event: %s\n", group, event); - ret = del_trace_probe_event(fd, group, event, namelist); + + if (namelist) + ret = del_trace_probe_event(kfd, buf, namelist); + + if (unamelist && ret != 0) + ret = del_trace_probe_event(ufd, buf, unamelist); + + if (ret != 0) + pr_info("Info: Event \"%s\" does not exist.\n", buf); + free(str); - if (ret < 0) - break; } - strlist__delete(namelist); - close(fd); + +error: + if (kfd >= 0) { + strlist__delete(namelist); + close(kfd); + } + + if (ufd >= 0) { + strlist__delete(unamelist); + close(ufd); + } return ret; } + /* TODO: don't use a global variable for filter ... */ static struct strfilter *available_func_filter; @@ -2068,30 +2192,152 @@ static int filter_available_functions(struct map *map __unused, return 1; } -int show_available_funcs(const char *target, struct strfilter *_filter) +static int __show_available_funcs(struct map *map) +{ + if (map__load(map, filter_available_functions)) { + pr_err("Failed to load map.\n"); + return -EINVAL; + } + if (!dso__sorted_by_name(map->dso, map->type)) + dso__sort_by_name(map->dso, map->type); + + dso__fprintf_symbols_by_name(map->dso, map->type, stdout); + return 0; +} + +static int available_kernel_funcs(const char *module) { struct map *map; int ret; - setup_pager(); - ret = init_vmlinux(); if (ret < 0) return ret; - map = kernel_get_module_map(target); + map = kernel_get_module_map(module); if (!map) { - pr_err("Failed to find %s map.\n", (target) ? : "kernel"); + pr_err("Failed to find %s map.\n", (module) ? : "kernel"); return -EINVAL; } + return __show_available_funcs(map); +} + +static int available_user_funcs(const char *target) +{ + struct map *map; + int ret; + + ret = init_user_exec(); + if (ret < 0) + return ret; + + map = dso__new_map(target); + ret = __show_available_funcs(map); + dso__delete(map->dso); + map__delete(map); + return ret; +} + +int show_available_funcs(const char *target, struct strfilter *_filter, + bool user) +{ + setup_pager(); available_func_filter = _filter; + + if (!user) + return available_kernel_funcs(target); + + return available_user_funcs(target); +} + +/* + * uprobe_events only accepts address: + * Convert function and any offset to address + */ +static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec) +{ + struct perf_probe_point *pp = &pev->point; + struct symbol *sym; + struct map *map = NULL; + char *function = NULL, *name = NULL; + int ret = -EINVAL; + unsigned long long vaddr = 0; + + if (!pp->function) { + pr_warning("No function specified for uprobes"); + goto out; + } + + function = strdup(pp->function); + if (!function) { + pr_warning("Failed to allocate memory by strdup.\n"); + ret = -ENOMEM; + goto out; + } + + name = realpath(exec, NULL); + if (!name) { + pr_warning("Cannot find realpath for %s.\n", exec); + goto out; + } + map = dso__new_map(name); + if (!map) { + pr_warning("Cannot find appropriate DSO for %s.\n", exec); + goto out; + } + available_func_filter = strfilter__new(function, NULL); if (map__load(map, filter_available_functions)) { pr_err("Failed to load map.\n"); - return -EINVAL; + goto out; } - if (!dso__sorted_by_name(map->dso, map->type)) - dso__sort_by_name(map->dso, map->type); - dso__fprintf_symbols_by_name(map->dso, map->type, stdout); - return 0; + sym = map__find_symbol_by_name(map, function, NULL); + if (!sym) { + pr_warning("Cannot find %s in DSO %s\n", function, exec); + goto out; + } + + if (map->start > sym->start) + vaddr = map->start; + vaddr += sym->start + pp->offset + map->pgoff; + pp->offset = 0; + + if (!pev->event) { + pev->event = function; + function = NULL; + } + if (!pev->group) { + char *ptr1, *ptr2; + + pev->group = zalloc(sizeof(char *) * 64); + ptr1 = strdup(basename(exec)); + if (ptr1) { + ptr2 = strpbrk(ptr1, "-._"); + if (ptr2) + *ptr2 = '\0'; + e_snprintf(pev->group, 64, "%s_%s", PERFPROBE_GROUP, + ptr1); + free(ptr1); + } + } + free(pp->function); + pp->function = zalloc(sizeof(char *) * MAX_PROBE_ARGS); + if (!pp->function) { + ret = -ENOMEM; + pr_warning("Failed to allocate memory by zalloc.\n"); + goto out; + } + e_snprintf(pp->function, MAX_PROBE_ARGS, "0x%llx", vaddr); + ret = 0; + +out: + if (map) { + dso__delete(map->dso); + map__delete(map); + } + if (function) + free(function); + if (name) + free(name); + return ret; } diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index a7dee835f49..f9f3de8b422 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -7,7 +7,7 @@ extern bool probe_event_dry_run; -/* kprobe-tracer tracing point */ +/* kprobe-tracer and uprobe-tracer tracing point */ struct probe_trace_point { char *symbol; /* Base symbol */ char *module; /* Module name */ @@ -21,7 +21,7 @@ struct probe_trace_arg_ref { long offset; /* Offset value */ }; -/* kprobe-tracer tracing argument */ +/* kprobe-tracer and uprobe-tracer tracing argument */ struct probe_trace_arg { char *name; /* Argument name */ char *value; /* Base value */ @@ -29,12 +29,13 @@ struct probe_trace_arg { struct probe_trace_arg_ref *ref; /* Referencing offset */ }; -/* kprobe-tracer tracing event (point + arg) */ +/* kprobe-tracer and uprobe-tracer tracing event (point + arg) */ struct probe_trace_event { char *event; /* Event name */ char *group; /* Group name */ struct probe_trace_point point; /* Trace point */ int nargs; /* Number of args */ + bool uprobes; /* uprobes only */ struct probe_trace_arg *args; /* Arguments */ }; @@ -70,6 +71,7 @@ struct perf_probe_event { char *group; /* Group name */ struct perf_probe_point point; /* Probe point */ int nargs; /* Number of arguments */ + bool uprobes; struct perf_probe_arg *args; /* Arguments */ }; @@ -129,8 +131,8 @@ extern int show_line_range(struct line_range *lr, const char *module); extern int show_available_vars(struct perf_probe_event *pevs, int npevs, int max_probe_points, const char *module, struct strfilter *filter, bool externs); -extern int show_available_funcs(const char *module, struct strfilter *filter); - +extern int show_available_funcs(const char *module, struct strfilter *filter, + bool user); /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ab9867b2b43..0ef529ec5cd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2783,3 +2783,14 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type, return ret; } + +struct map *dso__new_map(const char *name) +{ + struct map *map = NULL; + struct dso *dso = dso__new(name); + + if (dso) + map = map__new2(0, dso, MAP__FUNCTION); + + return map; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ac49ef208a5..9e7742c3e9b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -237,6 +237,7 @@ void dso__set_long_name(struct dso *dso, char *name); void dso__set_build_id(struct dso *dso, void *build_id); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); +struct map *dso__new_map(const char *name); struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, diff --git a/uprobes.txt b/uprobes.txt deleted file mode 100644 index e69de29bb2d..00000000000 --- a/uprobes.txt +++ /dev/null |