/* * Userspace Probes (UProbes) * uprobes.c * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2006-2008 */ /* * In versions of uprobes built in the SystemTap runtime, this file * is #included at the end of uprobes.c. */ #include #ifdef CONFIG_X86_32 #define is_32bit_app(tsk) 1 #else #define is_32bit_app(tsk) (test_tsk_thread_flag(tsk, TIF_IA32)) #endif /* Adapted from arch/x86_64/kprobes.c */ #undef W #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ (((b0##ULL<< 0x0)|(b1##ULL<< 0x1)|(b2##ULL<< 0x2)|(b3##ULL<< 0x3) | \ (b4##ULL<< 0x4)|(b5##ULL<< 0x5)|(b6##ULL<< 0x6)|(b7##ULL<< 0x7) | \ (b8##ULL<< 0x8)|(b9##ULL<< 0x9)|(ba##ULL<< 0xa)|(bb##ULL<< 0xb) | \ (bc##ULL<< 0xc)|(bd##ULL<< 0xd)|(be##ULL<< 0xe)|(bf##ULL<< 0xf)) \ << (row % 64)) static const u64 good_insns_64[256 / 64] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 00 */ W(0x10, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 10 */ W(0x20, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 20 */ W(0x30, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0), /* 30 */ W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */ W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */ W(0xd0, 1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1)| /* d0 */ W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */ W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; /* Good-instruction tables for 32-bit apps -- copied from i386 uprobes */ static const u64 good_insns_32[256 / 64] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 00 */ W(0x10, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 10 */ W(0x20, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1)| /* 20 */ W(0x30, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1), /* 30 */ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */ W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ W(0xd0, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1)| /* d0 */ W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */ W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; /* Using this for both 64-bit and 32-bit apps */ static const u64 good_2byte_insns[256 / 64] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */ W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1)| /* 10 */ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */ W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */ W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* d0 */ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */ W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; /* * opcodes we'll probably never support: * 6c-6d, e4-e5, ec-ed - in * 6e-6f, e6-e7, ee-ef - out * cc, cd - int3, int * cf - iret * d6 - illegal instruction * f1 - int1/icebp * f4 - hlt * fa, fb - cli, sti * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 * * invalid opcodes in 64-bit mode: * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 * * 63 - we support this opcode in x86_64 but not in i386. * opcodes we may need to refine support for: * 0f - 2-byte instructions: For many of these instructions, the validity * depends on the prefix and/or the reg field. On such instructions, we * just consider the opcode combination valid if it corresponds to any * valid instruction. * 8f - Group 1 - only reg = 0 is OK * c6-c7 - Group 11 - only reg = 0 is OK * d9-df - fpu insns with some illegal encodings * f2, f3 - repnz, repz prefixes. These are also the first byte for * certain floating-point instructions, such as addsd. * fe - Group 4 - only reg = 0 or 1 is OK * ff - Group 5 - only reg = 0-6 is OK * * others -- Do we need to support these? * 0f - (floating-point?) prefetch instructions * 07, 17, 1f - pop es, pop ss, pop ds * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- * but 64 and 65 (fs: and gs:) seem to be used, so we support them * 67 - addr16 prefix * ce - into * f0 - lock prefix */ /* * TODO: * - Where necessary, examine the modrm byte and allow only valid instructions * in the different Groups and fpu instructions. * - Note: If we go past the first byte, do we need to verify that * subsequent bytes were actually there, rather than off the last page? * - Be clearer about which instructions we'll never probe. */ /* * Return 1 if this is a legacy instruction prefix we support, -1 if * it's one we don't support, or 0 if it's not a prefix at all. */ static inline int check_legacy_prefix(u8 byte) { switch (byte) { case 0x26: case 0x2e: case 0x36: case 0x3e: case 0xf0: return -1; case 0x64: case 0x65: case 0x66: case 0x67: case 0xf2: case 0xf3: return 1; default: return 0; } } static void report_bad_1byte_opcode(int mode, uprobe_opcode_t op) { printk(KERN_ERR "In %d-bit apps, " "uprobes does not currently support probing " "instructions whose first byte is 0x%2.2x\n", mode, op); } static void report_bad_2byte_opcode(uprobe_opcode_t op) { printk(KERN_ERR "uprobes does not currently support probing " "instructions with the 2-byte opcode 0x0f 0x%2.2x\n", op); } static int validate_insn_32bits(struct uprobe_probept *ppt) { uprobe_opcode_t *insn = ppt->insn; int pfx; /* Skip good instruction prefixes; reject "bad" ones. */ while ((pfx = check_legacy_prefix(insn[0])) == 1) insn++; if (pfx < 0) { report_bad_1byte_opcode(32, insn[0]); return -EPERM; } if (test_bit(insn[0], (unsigned long*)good_insns_32)) return 0; if (insn[0] == 0x0f) { if (test_bit(insn[1], (unsigned long*)good_2byte_insns)) return 0; report_bad_2byte_opcode(insn[1]); } else report_bad_1byte_opcode(32, insn[0]); return -EPERM; } static int validate_insn_64bits(struct uprobe_probept *ppt) { uprobe_opcode_t *insn = ppt->insn; int pfx; /* Skip good instruction prefixes; reject "bad" ones. */ while ((pfx = check_legacy_prefix(insn[0])) == 1) insn++; if (pfx < 0) { report_bad_1byte_opcode(64, insn[0]); return -EPERM; } /* Skip REX prefix. */ if ((insn[0] & 0xf0) == 0x40) insn++; if (test_bit(insn[0], (unsigned long*)good_insns_64)) return 0; if (insn[0] == 0x0f) { if (test_bit(insn[1], (unsigned long*)good_2byte_insns)) return 0; report_bad_2byte_opcode(insn[1]); } else report_bad_1byte_opcode(64, insn[0]); return -EPERM; } #ifdef CONFIG_X86_64 static int handle_riprel_insn(struct uprobe_probept *ppt); #endif static int arch_validate_probed_insn(struct uprobe_probept *ppt, struct task_struct *tsk) { int ret; #ifdef CONFIG_X86_64 ppt->arch_info.flags = 0x0; ppt->arch_info.rip_target_address = 0x0; #endif if (is_32bit_app(tsk)) return validate_insn_32bits(ppt); if ((ret = validate_insn_64bits(ppt)) != 0) return ret; #ifdef CONFIG_X86_64 (void) handle_riprel_insn(ppt); #endif return 0; } #ifdef CONFIG_X86_64 /* * Returns 0 if the indicated instruction has no immediate operand * and/or can't use rip-relative addressing. Otherwise returns * the size of the immediate operand in the instruction. (Note that * for instructions such as "movq $7,xxxx(%rip)" the immediate-operand * field is 4 bytes, even though 8 bytes are stored.) */ static int immediate_operand_size(u8 opcode1, u8 opcode2, u8 reg, int operand_size_prefix) { switch (opcode1) { case 0x6b: /* imul immed,mem,reg */ case 0x80: /* Group 1 */ case 0x83: /* Group 1 */ case 0xc0: /* Group 2 */ case 0xc1: /* Group 2 */ case 0xc6: /* Group 11 */ return 1; case 0x69: /* imul immed,mem,reg */ case 0x81: /* Group 1 */ case 0xc7: /* Group 11 */ return (operand_size_prefix ? 2 : 4); case 0xf6: /* Group 3, reg field == 0 or 1 */ return (reg > 1 ? 0 : 1); case 0xf7: /* Group 3, reg field == 0 or 1 */ if (reg > 1) return 0; return (operand_size_prefix ? 2 : 4); case 0x0f: /* 2-byte opcodes */ switch (opcode2) { /* * Note: 0x71-73 (Groups 12-14) have immediate operands, * but not memory operands. */ case 0x70: /* pshuf* immed,mem,reg */ case 0xa4: /* shld immed,reg,mem */ case 0xac: /* shrd immed,reg,mem */ case 0xc2: /* cmpps or cmppd */ case 0xc4: /* pinsrw */ case 0xc5: /* pextrw */ case 0xc6: /* shufps or shufpd */ case 0x0f: /* 3DNow extensions */ return 1; default: return 0; } } return 0; } /* * TODO: These tables are common for kprobes and uprobes and can be moved * to a common place. */ static const u64 onebyte_has_modrm[256 / 64] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */ W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */ W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */ W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */ W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */ W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */ W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */ W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */ W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */ W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */ W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */ W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; static const u64 twobyte_has_modrm[256 / 64] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */ W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */ W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */ W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */ W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */ W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */ W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */ W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */ W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */ W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */ W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; /* * If pp->insn doesn't use rip-relative addressing, return 0. Otherwise, * rewrite the instruction so that it accesses its memory operand * indirectly through a scratch register. Set flags and rip_target_address * in ppt->arch_info accordingly. (The contents of the scratch register * will be saved before we single-step the modified instruction, and * restored afterward.) Return 1. * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the SSOL * area typically lies beyond that area. At least for instructions * that store to memory, we can't single-step the original instruction * and "fix things up" later, because the misdirected store could be * disastrous. * * Some useful facts about rip-relative instructions: * - There's always a modrm byte. * - There's never a SIB byte. * - The offset is always 4 bytes. */ static int handle_riprel_insn(struct uprobe_probept *ppt) { u8 *insn = (u8*) ppt->insn; u8 opcode1, opcode2, modrm, reg; int need_modrm; int operand_size_prefix = 0; int immed_size, instruction_size; /* * Skip legacy instruction prefixes. Some of these we don't * support (yet), but here we pretend to support all of them. * Skip the REX prefix, if any. */ while (check_legacy_prefix(*insn)) { if (*insn == 0x66) operand_size_prefix = 1; insn++; } if ((*insn & 0xf0) == 0x40) insn++; opcode1 = *insn; if (opcode1 == 0x0f) { /* Two-byte opcode. */ opcode2 = *++insn; need_modrm = test_bit(opcode2, (unsigned long*)twobyte_has_modrm); } else { /* One-byte opcode. */ opcode2 = 0x0; need_modrm = test_bit(opcode1, (unsigned long*)onebyte_has_modrm); } if (!need_modrm) return 0; modrm = *++insn; /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ if ((modrm & 0xc7) != 0x5) return 0; /* * We have a rip-relative instruction. insn points at the * modrm byte. The next 4 bytes are the offset. Beyond the * offset, for some instructions, is the immediate operand. */ reg = (modrm >> 3) & 0x7; immed_size = immediate_operand_size(opcode1, opcode2, reg, operand_size_prefix); instruction_size = (insn - (u8*) ppt->insn) /* prefixes + opcodes */ + 1 /* modrm byte */ + 4 /* offset */ + immed_size; /* immediate field */ #undef DEBUG_UPROBES_RIP #ifdef DEBUG_UPROBES_RIP { int i; BUG_ON(instruction_size > 15); printk(KERN_INFO "Munging rip-relative insn:"); for (i = 0; i < instruction_size; i++) printk(" %2.2x", ppt->insn[i]); printk("\n"); } #endif /* * Convert from rip-relative addressing to indirect addressing * via a scratch register. Change the r/m field from 0x5 (%rip) * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. */ if (reg == 0) { /* * The register operand (if any) is either the A register * (%rax, %eax, etc.) or (if the 0x4 bit is set in the * REX prefix) %r8. In any case, we know the C register * is NOT the register operand, so we use %rcx (register * #1) for the scratch register. */ ppt->arch_info.flags = UPFIX_RIP_RCX; /* Change modrm from 00 000 101 to 00 000 001. */ *insn = 0x1; } else { /* Use %rax (register #0) for the scratch register. */ ppt->arch_info.flags = UPFIX_RIP_RAX; /* Change modrm from 00 xxx 101 to 00 xxx 000 */ *insn = (reg << 3); } /* Target address = address of next instruction + (signed) offset */ insn++; ppt->arch_info.rip_target_address = (long) ppt->vaddr + instruction_size + *((s32*)insn); if (immed_size) memmove(insn, insn+4, immed_size); #ifdef DEBUG_UPROBES_RIP { int i; printk(KERN_INFO "Munged rip-relative insn: "); for (i = 0; i < instruction_size-4; i++) printk(" %2.2x", ppt->insn[i]); printk("\n"); printk(KERN_INFO "Target address = %#lx\n", ppt->arch_info.rip_target_address); } #endif return 1; } #endif /* * Get an instruction slot from the process's SSOL area, containing the * instruction at ppt's probepoint. Point the rip at that slot, in * preparation for single-stepping out of line. * * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ static void uprobe_pre_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt, struct pt_regs *regs) { struct uprobe_ssol_slot *slot; slot = uprobe_get_insn_slot(ppt); if (!slot) { utask->doomed = 1; return; } regs->ip = (long)slot->insn; utask->singlestep_addr = regs->ip; #ifdef CONFIG_X86_64 if (ppt->arch_info.flags == UPFIX_RIP_RAX) { utask->arch_info.saved_scratch_register = regs->ax; regs->ax = ppt->arch_info.rip_target_address; } else if (ppt->arch_info.flags == UPFIX_RIP_RCX) { utask->arch_info.saved_scratch_register = regs->cx; regs->cx = ppt->arch_info.rip_target_address; } #endif } /* * Called by uprobe_post_ssout() to adjust the return address * pushed by a call instruction executed out of line. */ static void adjust_ret_addr(unsigned long rsp, long correction, struct uprobe_task *utask) { unsigned long nleft; if (is_32bit_app(current)) { s32 ra; nleft = copy_from_user(&ra, (const void __user *) rsp, 4); if (unlikely(nleft != 0)) goto fail; ra += (s32) correction; nleft = copy_to_user((void __user *) rsp, &ra, 4); if (unlikely(nleft != 0)) goto fail; } else { s64 ra; nleft = copy_from_user(&ra, (const void __user *) rsp, 8); if (unlikely(nleft != 0)) goto fail; ra += correction; nleft = copy_to_user((void __user *) rsp, &ra, 8); if (unlikely(nleft != 0)) goto fail; } return; fail: printk(KERN_ERR "uprobes: Failed to adjust return address after" " single-stepping call instruction;" " pid=%d, rsp=%#lx\n", current->pid, rsp); utask->doomed = 1; } /* * Called after single-stepping. ppt->vaddr is the address of the * instruction whose first byte has been replaced by the "int3" * instruction. To avoid the SMP problems that can occur when we * temporarily put back the original opcode to single-step, we * single-stepped a copy of the instruction. The address of this * copy is utask->singlestep_addr. * * This function prepares to return from the post-single-step * trap. We have to fix things up as follows: * * 0) Typically, the new rip is relative to the copied instruction. We * need to make it relative to the original instruction. Exceptions are * return instructions and absolute or indirect jump or call instructions. * * 1) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. * * 2) If the original instruction was a rip-relative instruction such as * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent * instruction using a scratch register -- e.g., "movl %edx,(%rax)". * We need to restore the contents of the scratch register and adjust * the rip, keeping in mind that the instruction we executed is 4 bytes * shorter than the original instruction (since we squeezed out the offset * field). */ static void uprobe_post_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt, struct pt_regs *regs) { unsigned long next_ip = 0; unsigned long copy_ip = utask->singlestep_addr; unsigned long orig_ip = ppt->vaddr; long correction = (long) (orig_ip - copy_ip); uprobe_opcode_t *insn = ppt->insn; #ifdef CONFIG_X86_64 unsigned long flags = ppt->arch_info.flags; #endif up_read(&ppt->slot->rwsem); #ifdef CONFIG_X86_64 if (flags & (UPFIX_RIP_RAX | UPFIX_RIP_RCX)) { if (flags & UPFIX_RIP_RAX) regs->ax = utask->arch_info.saved_scratch_register; else regs->cx = utask->arch_info.saved_scratch_register; /* * The original instruction includes a displacement, and so * is 4 bytes longer than what we've just single-stepped. * Fall through to handle stuff like "jmpq *...(%rip)" and * "callq *...(%rip)". */ correction += 4; } #endif /* * TODO: Move all this instruction parsing to * arch_validate_probed_insn(), and store what we learn in * ppt->arch_info.flags. * * We don't bother skipping prefixes here because none of the * instructions that require special treatment (other than * rip-relative instructions, handled above) involve prefixes. */ switch (*insn) { case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: case 0xca: /* rip is correct */ next_ip = regs->ip; break; case 0xe8: /* call relative - Fix return addr */ adjust_ret_addr(regs->sp, correction, utask); break; case 0x9a: /* call absolute - Fix return addr */ adjust_ret_addr(regs->sp, correction, utask); next_ip = regs->ip; break; case 0xff: if ((insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ /* Fix return addr; rip is correct. */ next_ip = regs->ip; adjust_ret_addr(regs->sp, correction, utask); } else if ((insn[1] & 0x31) == 0x20 || /* jmp near, absolute indirect */ (insn[1] & 0x31) == 0x21) { /* jmp far, absolute indirect */ /* rip is correct. */ next_ip = regs->ip; } break; case 0xea: /* jmp absolute -- rip is correct */ next_ip = regs->ip; break; default: break; } if (next_ip) regs->ip = next_ip; else regs->ip += correction; } /* * Replace the return address with the trampoline address. Returns * the original return address. */ static unsigned long arch_hijack_uret_addr(unsigned long trampoline_address, struct pt_regs *regs, struct uprobe_task *utask) { int nleft; unsigned long orig_ret_addr = 0; /* clear high bits for 32-bit apps */ size_t rasize; if (is_32bit_app(current)) rasize = 4; else rasize = 8; nleft = copy_from_user(&orig_ret_addr, (const void __user *) regs->sp, rasize); if (unlikely(nleft != 0)) return 0; if (orig_ret_addr == trampoline_address) /* * There's another uretprobe on this function, and it was * processed first, so the return address has already * been hijacked. */ return orig_ret_addr; nleft = copy_to_user((void __user *) regs->sp, &trampoline_address, rasize); if (unlikely(nleft != 0)) { if (nleft != rasize) { printk(KERN_ERR "uretprobe_entry_handler: " "return address partially clobbered -- " "pid=%d, %%sp=%#lx, %%ip=%#lx\n", current->pid, regs->sp, regs->ip); utask->doomed = 1; } // else nothing written, so no harm return 0; } return orig_ret_addr; }