summaryrefslogtreecommitdiffstats
path: root/runtime/uprobes
diff options
context:
space:
mode:
authorkenistoj <kenistoj>2007-10-22 20:12:10 +0000
committerkenistoj <kenistoj>2007-10-22 20:12:10 +0000
commitaa1547b49b569064715d939d6a9dad4cd14e708c (patch)
tree3262a6182a0be022cfc71b201354aa49b953f1ee /runtime/uprobes
parentf994dd4997067ca69b3b10ff8fa1547987aed585 (diff)
downloadsystemtap-steved-aa1547b49b569064715d939d6a9dad4cd14e708c.tar.gz
systemtap-steved-aa1547b49b569064715d939d6a9dad4cd14e708c.tar.xz
systemtap-steved-aa1547b49b569064715d939d6a9dad4cd14e708c.zip
x86_64 uprobes
Diffstat (limited to 'runtime/uprobes')
-rw-r--r--runtime/uprobes/uprobes_x86_64.c697
-rw-r--r--runtime/uprobes/uprobes_x86_64.h84
2 files changed, 781 insertions, 0 deletions
diff --git a/runtime/uprobes/uprobes_x86_64.c b/runtime/uprobes/uprobes_x86_64.c
new file mode 100644
index 00000000..23dcdadb
--- /dev/null
+++ b/runtime/uprobes/uprobes_x86_64.c
@@ -0,0 +1,697 @@
+/*
+ * Userspace Probes (UProbes)
+ * arch/x86_64/kernel/uprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006-2007
+ */
+/*
+ * In versions of uprobes built in the SystemTap runtime, this file
+ * is #included at the end of uprobes.c.
+ */
+
+#define is_32bit_app(tsk) (test_tsk_thread_flag(tsk, TIF_IA32))
+
+/* Adapted from arch/x86_64/kprobes.c */
+#undef W
+#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
+ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
+ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
+ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
+ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
+ << (row % 64))
+
+static const unsigned long good_insns_64[256 / 64] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 00 */
+ W(0x10, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 10 */
+ W(0x20, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 20 */
+ W(0x30, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0), /* 30 */
+ W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
+ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
+ W(0x60, 0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0)| /* 60 */
+ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
+ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
+ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
+ W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */
+ W(0xd0, 1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1)| /* d0 */
+ W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */
+ W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+/* Good-instruction tables for 32-bit apps -- copied from i386 uprobes */
+
+static const unsigned long good_insns_32[256 / 64] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 00 */
+ W(0x10, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 10 */
+ W(0x20, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1)| /* 20 */
+ W(0x30, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1), /* 30 */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
+ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
+ W(0x60, 1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,0)| /* 60 */
+ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
+ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
+ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
+ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
+ W(0xd0, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1)| /* d0 */
+ W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */
+ W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+/* Using this for both 64-bit and 32-bit apps */
+static const unsigned long good_2byte_insns[256 / 64] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */
+ W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1)| /* 10 */
+ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */
+ W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
+ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
+ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */
+ W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
+ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
+ W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */
+ W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */
+ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */
+ W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* d0 */
+ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */
+ W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+/*
+ * opcodes we'll probably never support:
+ * 63 - arpl
+ * 6c-6d, e4-e5, ec-ed - in
+ * 6e-6f, e6-e7, ee-ef - out
+ * cc, cd - int3, int
+ * cf - iret
+ * d6 - illegal instruction
+ * f1 - int1/icebp
+ * f4 - hlt
+ * fa, fb - cli, sti
+ * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
+ *
+ * invalid opcodes in 64-bit mode:
+ * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, c4-c5, d4-d5
+ *
+ * opcodes we may need to refine support for:
+ * 0f - 2-byte instructions: For many of these instructions, the validity
+ * depends on the prefix and/or the reg field. On such instructions, we
+ * just consider the opcode combination valid if it corresponds to any
+ * valid instruction.
+ * 8f - Group 1 - only reg = 0 is OK
+ * c6-c7 - Group 11 - only reg = 0 is OK
+ * d9-df - fpu insns with some illegal encodings
+ * f2, f3 - repnz, repz prefixes. These are also the first byte for
+ * certain floating-point instructions, such as addsd.
+ * fe - Group 4 - only reg = 0 or 1 is OK
+ * ff - Group 5 - only reg = 0-6 is OK
+ *
+ * others -- Do we need to support these?
+ * 0f - (floating-point?) prefetch instructions
+ * 07, 17, 1f - pop es, pop ss, pop ds
+ * 26, 2e, 36, 3e, 64, 65 - es:, cs:, ss:, ds:, fs:, gs: segment prefixes
+ * 67 - addr16 prefix
+ * 9b - wait/fwait
+ * ce - into
+ * f0 - lock prefix
+ */
+
+/*
+ * TODO:
+ * - Where necessary, examine the modrm byte and allow only valid instructions
+ * in the different Groups and fpu instructions.
+ * - Note: If we go past the first byte, do we need to verify that
+ * subsequent bytes were actually there, rather than off the last page?
+ * - Be clearer about which instructions we'll never probe.
+ */
+
+/*
+ * Return 1 if this is a legacy instruction prefix we support, -1 if
+ * it's one we don't support, or 0 if it's not a prefix at all.
+ */
+static inline int check_legacy_prefix(u8 byte)
+{
+ switch (byte) {
+ case 0x26:
+ case 0x2e:
+ case 0x36:
+ case 0x3e:
+ case 0x64:
+ case 0x65:
+ case 0xf0:
+ return -1;
+ case 0x66:
+ case 0x67:
+ case 0xf2:
+ case 0xf3:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void report_bad_1byte_opcode(int mode, uprobe_opcode_t op)
+{
+ printk(KERN_ERR "In %d-bit apps, "
+ "uprobes does not currently support probing "
+ "instructions whose first byte is 0x%2.2x\n", mode, op);
+}
+
+static void report_bad_2byte_opcode(uprobe_opcode_t op)
+{
+ printk(KERN_ERR "uprobes does not currently support probing "
+ "instructions with the 2-byte opcode 0x0f 0x%2.2x\n", op);
+}
+
+static int validate_insn_32bits(struct uprobe_probept *ppt)
+{
+ uprobe_opcode_t *insn = ppt->insn;
+ int pfx;
+
+ /* Skip good instruction prefixes; reject "bad" ones. */
+ while ((pfx = check_legacy_prefix(insn[0])) == 1)
+ insn++;
+ if (pfx < 0) {
+ report_bad_1byte_opcode(32, insn[0]);
+ return -EPERM;
+ }
+ if (test_bit(insn[0], good_insns_32))
+ return 0;
+ if (insn[0] == 0x0f) {
+ if (test_bit(insn[1], good_2byte_insns))
+ return 0;
+ report_bad_2byte_opcode(insn[1]);
+ } else
+ report_bad_1byte_opcode(32, insn[0]);
+ return -EPERM;
+}
+
+static int validate_insn_64bits(struct uprobe_probept *ppt)
+{
+ uprobe_opcode_t *insn = ppt->insn;
+ int pfx;
+
+ /* Skip good instruction prefixes; reject "bad" ones. */
+ while ((pfx = check_legacy_prefix(insn[0])) == 1)
+ insn++;
+ if (pfx < 0) {
+ report_bad_1byte_opcode(64, insn[0]);
+ return -EPERM;
+ }
+ /* Skip REX prefix. */
+ if ((insn[0] & 0xf0) == 0x40)
+ insn++;
+ if (test_bit(insn[0], good_insns_64))
+ return 0;
+ if (insn[0] == 0x0f) {
+ if (test_bit(insn[1], good_2byte_insns))
+ return 0;
+ report_bad_2byte_opcode(insn[1]);
+ } else
+ report_bad_1byte_opcode(64, insn[0]);
+ return -EPERM;
+}
+
+static int handle_riprel_insn(struct uprobe_probept *ppt);
+
+static
+int arch_validate_probed_insn(struct uprobe_probept *ppt,
+ struct task_struct *tsk)
+{
+ int ret;
+
+ ppt->arch_info.flags = 0x0;
+ ppt->arch_info.rip_target_address = 0x0;
+
+ if (is_32bit_app(tsk))
+ return validate_insn_32bits(ppt);
+ if ((ret = validate_insn_64bits(ppt)) != 0)
+ return ret;
+ (void) handle_riprel_insn(ppt);
+ return 0;
+}
+
+/*
+ * Returns 0 if the indicated instruction has no immediate operand
+ * and/or can't use rip-relative addressing. Otherwise returns
+ * the size of the immediate operand in the instruction. (Note that
+ * for instructions such as "movq $7,xxxx(%rip)" the immediate-operand
+ * field is 4 bytes, even though 8 bytes are stored.)
+ */
+static int immediate_operand_size(u8 opcode1, u8 opcode2, u8 reg,
+ int operand_size_prefix)
+{
+ switch (opcode1) {
+ case 0x6b: /* imul immed,mem,reg */
+ case 0x80: /* Group 1 */
+ case 0x83: /* Group 1 */
+ case 0xc0: /* Group 2 */
+ case 0xc1: /* Group 2 */
+ case 0xc6: /* Group 11 */
+ return 1;
+ case 0x69: /* imul immed,mem,reg */
+ case 0x81: /* Group 1 */
+ case 0xc7: /* Group 11 */
+ return (operand_size_prefix ? 2 : 4);
+ case 0xf6: /* Group 3, reg field == 0 or 1 */
+ return (reg > 1 ? 0 : 1);
+ case 0xf7: /* Group 3, reg field == 0 or 1 */
+ if (reg > 1)
+ return 0;
+ return (operand_size_prefix ? 2 : 4);
+ case 0x0f:
+ /* 2-byte opcodes */
+ switch (opcode2) {
+ /*
+ * Note: 0x71-73 (Groups 12-14) have immediate operands,
+ * but not memory operands.
+ */
+ case 0x70: /* pshuf* immed,mem,reg */
+ case 0xa4: /* shld immed,reg,mem */
+ case 0xac: /* shrd immed,reg,mem */
+ case 0xc2: /* cmpps or cmppd */
+ case 0xc4: /* pinsrw */
+ case 0xc5: /* pextrw */
+ case 0xc6: /* shufps or shufpd */
+ case 0x0f: /* 3DNow extensions */
+ return 1;
+ default:
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+ * TODO: These tables are common for kprobes and uprobes and can be moved
+ * to a common place.
+ */
+static const u64 onebyte_has_modrm[256 / 64] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */
+ W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */
+ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */
+ W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */
+ W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
+ W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */
+ W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */
+ W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
+ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */
+ W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */
+ W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */
+ W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */
+ W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */
+ W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
+ W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+static const u64 twobyte_has_modrm[256 / 64] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */
+ W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */
+ W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */
+ W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */
+ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */
+ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */
+ W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */
+ W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */
+ W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */
+ W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */
+ W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */
+ W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */
+ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */
+ W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+/*
+ * If pp->insn doesn't use rip-relative addressing, return 0. Otherwise,
+ * rewrite the instruction so that it accesses its memory operand
+ * indirectly through a scratch register. Set flags and rip_target_address
+ * in ppt->arch_info accordingly. (The contents of the scratch register
+ * will be saved before we single-step the modified instruction, and
+ * restored afterward.) Return 1.
+ *
+ * We do this because a rip-relative instruction can access only a
+ * relatively small area (+/- 2 GB from the instruction), and the SSOL
+ * area typically lies beyond that area. At least for instructions
+ * that store to memory, we can't single-step the original instruction
+ * and "fix things up" later, because the misdirected store could be
+ * disastrous.
+ *
+ * Some useful facts about rip-relative instructions:
+ * - There's always a modrm byte.
+ * - There's never a SIB byte.
+ * - The offset is always 4 bytes.
+ */
+static int handle_riprel_insn(struct uprobe_probept *ppt)
+{
+ u8 *insn = (u8*) ppt->insn;
+ u8 opcode1, opcode2, modrm, reg;
+ int need_modrm;
+ int operand_size_prefix = 0;
+ int immed_size, instruction_size;
+
+ /*
+ * Skip legacy instruction prefixes. Some of these we don't
+ * support (yet), but here we pretend to support all of them.
+ * Skip the REX prefix, if any.
+ */
+ while (check_legacy_prefix(*insn)) {
+ if (*insn == 0x66)
+ operand_size_prefix = 1;
+ insn++;
+ }
+ if ((*insn & 0xf0) == 0x40)
+ insn++;
+
+ opcode1 = *insn;
+ if (opcode1 == 0x0f) { /* Two-byte opcode. */
+ opcode2 = *++insn;
+ need_modrm = test_bit(opcode2, twobyte_has_modrm);
+ } else { /* One-byte opcode. */
+ opcode2 = 0x0;
+ need_modrm = test_bit(opcode1, onebyte_has_modrm);
+ }
+
+ if (!need_modrm)
+ return 0;
+
+ modrm = *++insn;
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+ */
+ if ((modrm & 0xc7) != 0x5)
+ return 0;
+
+ /*
+ * We have a rip-relative instruction. insn points at the
+ * modrm byte. The next 4 bytes are the offset. Beyond the
+ * offset, for some instructions, is the immediate operand.
+ */
+ reg = (modrm >> 3) & 0x7;
+ immed_size = immediate_operand_size(opcode1, opcode2, reg,
+ operand_size_prefix);
+ instruction_size =
+ (insn - (u8*) ppt->insn) /* prefixes + opcodes */
+ + 1 /* modrm byte */
+ + 4 /* offset */
+ + immed_size; /* immediate field */
+#undef DEBUG_UPROBES_RIP
+#ifdef DEBUG_UPROBES_RIP
+{
+ int i;
+ BUG_ON(instruction_size > 15);
+ printk(KERN_INFO "Munging rip-relative insn:");
+ for (i = 0; i < instruction_size; i++)
+ printk(" %2.2x", ppt->insn[i]);
+ printk("\n");
+}
+#endif
+
+ /*
+ * Convert from rip-relative addressing to indirect addressing
+ * via a scratch register. Change the r/m field from 0x5 (%rip)
+ * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
+ */
+ if (reg == 0) {
+ /*
+ * The register operand (if any) is either the A register
+ * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
+ * REX prefix) %r8. In any case, we know the C register
+ * is NOT the register operand, so we use %rcx (register
+ * #1) for the scratch register.
+ */
+ ppt->arch_info.flags = UPFIX_RIP_RCX;
+ /* Change modrm from 00 000 101 to 00 000 001. */
+ *insn = 0x1;
+ } else {
+ /* Use %rax (register #0) for the scratch register. */
+ ppt->arch_info.flags = UPFIX_RIP_RAX;
+ /* Change modrm from 00 xxx 101 to 00 xxx 000 */
+ *insn = (reg << 3);
+ }
+
+ /* Target address = address of next instruction + (signed) offset */
+ insn++;
+ ppt->arch_info.rip_target_address =
+ (long) ppt->vaddr + instruction_size + *((s32*)insn);
+ if (immed_size)
+ memmove(insn, insn+4, immed_size);
+#ifdef DEBUG_UPROBES_RIP
+{
+ int i;
+ printk(KERN_INFO "Munged rip-relative insn: ");
+ for (i = 0; i < instruction_size-4; i++)
+ printk(" %2.2x", ppt->insn[i]);
+ printk("\n");
+ printk(KERN_INFO "Target address = %#lx\n",
+ ppt->arch_info.rip_target_address);
+}
+#endif
+ return 1;
+}
+
+/*
+ * Get an instruction slot from the process's SSOL area, containing the
+ * instruction at ppt's probepoint. Point the rip at that slot, in
+ * preparation for single-stepping out of line.
+ *
+ * If we're emulating a rip-relative instruction, save the contents
+ * of the scratch register and store the target address in that register.
+ */
+static
+void uprobe_pre_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt,
+ struct pt_regs *regs)
+{
+ struct uprobe_ssol_slot *slot;
+
+ slot = uprobe_get_insn_slot(ppt);
+ if (!slot) {
+ utask->doomed = 1;
+ return;
+ }
+
+ regs->rip = (long)slot->insn;
+ utask->singlestep_addr = regs->rip;
+
+ if (ppt->arch_info.flags == UPFIX_RIP_RAX) {
+ utask->arch_info.saved_scratch_register = regs->rax;
+ regs->rax = ppt->arch_info.rip_target_address;
+ } else if (ppt->arch_info.flags == UPFIX_RIP_RCX) {
+ utask->arch_info.saved_scratch_register = regs->rcx;
+ regs->rcx = ppt->arch_info.rip_target_address;
+ }
+}
+
+/*
+ * Called by uprobe_post_ssout() to adjust the return address
+ * pushed by a call instruction executed out of line.
+ */
+static void adjust_ret_addr(unsigned long rsp, long correction,
+ struct uprobe_task *utask)
+{
+ unsigned long nleft;
+ if (is_32bit_app(current)) {
+ s32 ra;
+ nleft = copy_from_user(&ra, (const void __user *) rsp, 4);
+ if (unlikely(nleft != 0))
+ goto fail;
+ ra += (s32) correction;
+ nleft = copy_to_user((void __user *) rsp, &ra, 4);
+ if (unlikely(nleft != 0))
+ goto fail;
+ } else {
+ s64 ra;
+ nleft = copy_from_user(&ra, (const void __user *) rsp, 8);
+ if (unlikely(nleft != 0))
+ goto fail;
+ ra += correction;
+ nleft = copy_to_user((void __user *) rsp, &ra, 8);
+ if (unlikely(nleft != 0))
+ goto fail;
+ }
+ return;
+
+fail:
+ printk(KERN_ERR
+ "uprobes: Failed to adjust return address after"
+ " single-stepping call instruction;"
+ " pid=%d, rsp=%#lx\n", current->pid, rsp);
+ utask->doomed = 1;
+}
+
+/*
+ * Called after single-stepping. ppt->vaddr is the address of the
+ * instruction whose first byte has been replaced by the "int3"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is utask->singlestep_addr.
+ *
+ * This function prepares to return from the post-single-step
+ * trap. We have to fix things up as follows:
+ *
+ * 0) Typically, the new rip is relative to the copied instruction. We
+ * need to make it relative to the original instruction. Exceptions are
+ * return instructions and absolute or indirect jump or call instructions.
+ *
+ * 1) If the single-stepped instruction was a call, the return address
+ * that is atop the stack is the address following the copied instruction.
+ * We need to make it the address following the original instruction.
+ *
+ * 2) If the original instruction was a rip-relative instruction such as
+ * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
+ * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
+ * We need to restore the contents of the scratch register and adjust
+ * the rip, keeping in mind that the instruction we executed is 4 bytes
+ * shorter than the original instruction (since we squeezed out the offset
+ * field).
+ */
+static
+void uprobe_post_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt,
+ struct pt_regs *regs)
+{
+ unsigned long next_rip = 0;
+ unsigned long copy_rip = utask->singlestep_addr;
+ unsigned long orig_rip = ppt->vaddr;
+ long correction = (long) (orig_rip - copy_rip);
+ uprobe_opcode_t *insn = ppt->insn;
+ unsigned long flags = ppt->arch_info.flags;
+
+ up_read(&ppt->slot->rwsem);
+
+ if (flags & (UPFIX_RIP_RAX | UPFIX_RIP_RCX)) {
+ if (flags & UPFIX_RIP_RAX)
+ regs->rax = utask->arch_info.saved_scratch_register;
+ else
+ regs->rcx = utask->arch_info.saved_scratch_register;
+ regs->rip += (4 + correction);
+ return;
+ }
+
+ /*
+ * TODO: Move all this instruction parsing to
+ * arch_validate_probed_insn(), and store what we learn in
+ * ppt->arch_info.flags.
+ *
+ * We don't bother skipping prefixes here because none of the
+ * non-rip-relative instructions that require special treatment
+ * involve prefixes.
+ */
+
+ switch (*insn) {
+ case 0xc3: /* ret/lret */
+ case 0xcb:
+ case 0xc2:
+ case 0xca:
+ /* rip is correct */
+ next_rip = regs->rip;
+ break;
+ case 0xe8: /* call relative - Fix return addr */
+ adjust_ret_addr(regs->rsp, correction, utask);
+ break;
+ case 0xff:
+ if ((insn[1] & 0x30) == 0x10) {
+ /* call absolute, indirect */
+ /* Fix return addr; rip is correct. */
+ next_rip = regs->rip;
+ adjust_ret_addr(regs->rsp, correction, utask);
+ } else if ((insn[1] & 0x31) == 0x20 || /* jmp near, absolute indirect */
+ (insn[1] & 0x31) == 0x21) { /* jmp far, absolute indirect */
+ /* rip is correct. */
+ next_rip = regs->rip;
+ }
+ break;
+ case 0xea: /* jmp absolute -- rip is correct */
+ next_rip = regs->rip;
+ break;
+ default:
+ break;
+ }
+
+ if (next_rip)
+ regs->rip = next_rip;
+ else
+ regs->rip += correction;
+}
+
+/*
+ * Replace the return address with the trampoline address. Returns
+ * the original return address.
+ */
+static
+unsigned long arch_hijack_uret_addr(unsigned long trampoline_address,
+ struct pt_regs *regs, struct uprobe_task *utask)
+{
+ int nleft;
+ unsigned long orig_ret_addr = 0; /* clear high bits for 32-bit apps */
+ size_t rasize;
+
+ if (is_32bit_app(current))
+ rasize = 4;
+ else
+ rasize = 8;
+ nleft = copy_from_user(&orig_ret_addr, (const void __user *) regs->rsp,
+ rasize);
+ if (unlikely(nleft != 0))
+ return 0;
+ if (orig_ret_addr == trampoline_address)
+ /*
+ * There's another uretprobe on this function, and it was
+ * processed first, so the return address has already
+ * been hijacked.
+ */
+ return orig_ret_addr;
+
+ nleft = copy_to_user((void __user *) regs->rsp, &trampoline_address,
+ rasize);
+ if (unlikely(nleft != 0)) {
+ if (nleft != rasize) {
+ printk(KERN_ERR "uretprobe_entry_handler: "
+ "return address partially clobbered -- "
+ "pid=%d, %%rsp=%#lx, %%rip=%#lx\n",
+ current->pid, regs->rsp, regs->rip);
+ utask->doomed = 1;
+ } // else nothing written, so no harm
+ return 0;
+ }
+ return orig_ret_addr;
+}
diff --git a/runtime/uprobes/uprobes_x86_64.h b/runtime/uprobes/uprobes_x86_64.h
new file mode 100644
index 00000000..c9345073
--- /dev/null
+++ b/runtime/uprobes/uprobes_x86_64.h
@@ -0,0 +1,84 @@
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+/*
+ * Userspace Probes (UProbes)
+ * include/asm-x86_64/uprobes.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <asm/thread_info.h>
+
+/* Normally defined in Kconfig */
+#define CONFIG_URETPROBES 1
+#define CONFIG_UPROBES_SSOL 1
+
+typedef u8 uprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION 0xcc
+#define BP_INSN_SIZE 1
+#define MAX_UINSN_BYTES 16
+
+// SLOT_IP should be 16 for 64-bit apps (include/asm-x86_64/elf.h)
+// but 12 for 32-bit apps (arch/x86_64/ia32/ia32_binfmt.c)
+#define SLOT_IP(tsk) (test_tsk_thread_flag(tsk, TIF_IA32) ? 12 : 16)
+
+#define BREAKPOINT_SIGNAL SIGTRAP
+#define SSTEP_SIGNAL SIGTRAP
+
+/* Architecture specific switch for where the IP points after a bp hit */
+#define ARCH_BP_INST_PTR(inst_ptr) (inst_ptr - BP_INSN_SIZE)
+
+#define UPFIX_RIP_RAX 0x1 /* (%rip) insn rewritten to use (%rax) */
+#define UPFIX_RIP_RCX 0x2 /* (%rip) insn rewritten to use (%rcx) */
+
+struct uprobe_probept_arch_info {
+ unsigned long flags;
+ unsigned long rip_target_address;
+};
+
+struct uprobe_task_arch_info {
+ unsigned long saved_scratch_register;
+};
+
+struct uprobe_probept;
+struct uprobe_task;
+
+static int arch_validate_probed_insn(struct uprobe_probept *ppt,
+ struct task_struct *tsk);
+
+/* On x86_64, the int3 traps leaves rip pointing past the int3 instruction. */
+static inline unsigned long arch_get_probept(struct pt_regs *regs)
+{
+ return (unsigned long) (regs->rip - BP_INSN_SIZE);
+}
+
+static inline void arch_reset_ip_for_sstep(struct pt_regs *regs)
+{
+ regs->rip -= BP_INSN_SIZE;
+}
+
+static inline void arch_restore_uret_addr(unsigned long ret_addr,
+ struct pt_regs *regs)
+{
+ regs->rip = ret_addr;
+}
+
+static unsigned long arch_hijack_uret_addr(unsigned long trampoline_addr,
+ struct pt_regs*, struct uprobe_task*);
+#endif /* _ASM_UPROBES_H */