/*
 *  Userspace Probes (UProbes)
 *  arch/i386/kernel/uprobes_i386.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2006
 */
/*
 * In versions of uprobes built in the SystemTap runtime, this file
 * is #included at the end of uprobes.c.
 */
#include <linux/uaccess.h>

/* Adapted from arch/x86_64/kprobes.c */
#undef W
#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
	 << (row % 32))
	static const unsigned long good_insns[256 / 32] = {
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
		/*      -------------------------------         */
		W(0x00, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 00 */
		W(0x10, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0), /* 10 */
		W(0x20, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1)| /* 20 */
		W(0x30, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1), /* 30 */
		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
		W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */
		W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
		W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
		W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
		W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
		W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
		W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
		W(0xd0, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1), /* d0 */
		W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */
		W(0xf0, 0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,1)  /* f0 */
		/*      -------------------------------         */
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
	};

	static const unsigned long good_2byte_insns[256 / 32] = {
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
		/*      -------------------------------         */
		W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */
		W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */
		W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */
		W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
		W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */
		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */
		W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
		W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
		W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */
		W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */
		W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */
		W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */
		W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */
		W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0)  /* f0 */
		/*      -------------------------------         */
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
	};

/*
 * TODO:
 * - Where necessary, examine the modrm byte and allow only valid instructions
 * in the different Groups and fpu instructions.
 * - Note: If we go past the first byte, do we need to verify that
 * subsequent bytes were actually there, rather than off the last page?
 * Probably overkill.  We don't verify that they specified the first byte
 * of the instruction, either.
 * - Be clearer about which instructions we'll never probe.
 */

/*
 * opcodes we'll probably never support:
 * 63 - arpl
 * 6c-6d, e4-e5, ec-ed - in
 * 6e-6f, e6-e7, ee-ef - out
 * cc, cd - int3, int
 * cf - iret
 * d6 - illegal instruction
 * f1 - int1/icebp
 * f4 - hlt
 * fa, fb - cli, sti
 *
 * opcodes we may need to refine support for:
 * 66 - data16 prefix
 * 8f - Group 1 - only reg = 0 is OK
 * c6-c7 - Group 11 - only reg = 0 is OK
 * d9-df - fpu insns with some illegal encodings
 * fe - Group 4 - only reg = 0 or 1 is OK
 * ff - Group 5 - only reg = 0-6 is OK
 *
 * others -- Do we need to support these?
 * 07, 17, 1f - pop es, pop ss, pop ds
 * 26, 2e, 36, 3e, - es:, cs:, ss:, ds: segment prefixes --
 *	but 64 and 65 (fs: and gs:) seems to be used, so we support them.
 * 67 - addr16 prefix
 * ce - into
 * f0 - lock prefix
 * f2, f3 - repnz, repz prefixes
 */

static
int arch_validate_probed_insn(struct uprobe_probept *ppt,
						struct task_struct *tsk)
{
	uprobe_opcode_t *insn = ppt->insn;

	if (insn[0] == 0x66)
		/* Skip operand-size prefix */
		insn++;
	if (test_bit(insn[0], good_insns))
		return 0;
	if (insn[0] == 0x0f) {
		if (test_bit(insn[1], good_2byte_insns))
			return 0;
		printk(KERN_ERR "uprobes does not currently support probing "
			"instructions with the 2-byte opcode 0x0f 0x%2.2x\n",
			insn[1]);
	} else 
		printk(KERN_ERR "uprobes does not currently support probing "
			"instructions whose first byte is 0x%2.2x\n", insn[0]);
	return -EPERM;
}

/*
 * Get an instruction slot from the process's SSOL area, containing the
 * instruction at ppt's probepoint.  Point the eip at that slot, in
 * preparation for single-stepping out of line.
 */
static
void uprobe_pre_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt,
		struct pt_regs *regs)
{
	struct uprobe_ssol_slot *slot;

	slot = uprobe_get_insn_slot(ppt);
	if (!slot) {
		utask->doomed = 1;
		return;
	}
	regs->eip = (long)slot->insn;
	utask->singlestep_addr = regs->eip;
}

/*
 * Called by uprobe_post_ssout() to adjust the return address
 * pushed by a call instruction executed out-of-line.
 */
static void adjust_ret_addr(long esp, long correction,
		struct uprobe_task *utask)
{
	int nleft;
	long ra;

	nleft = copy_from_user(&ra, (const void __user *) esp, 4);
	if (unlikely(nleft != 0))
		goto fail;
	ra +=  correction;
	nleft = copy_to_user((void __user *) esp, &ra, 4);
	if (unlikely(nleft != 0))
		goto fail;
	return;

fail:
	printk(KERN_ERR
		"uprobes: Failed to adjust return address after"
		" single-stepping call instruction;"
		" pid=%d, esp=%#lx\n", current->pid, esp);
	utask->doomed = 1;
}

/*
 * Called after single-stepping.  ppt->vaddr is the address of the
 * instruction whose first byte has been replaced by the "int3"
 * instruction.  To avoid the SMP problems that can occur when we
 * temporarily put back the original opcode to single-step, we
 * single-stepped a copy of the instruction.  The address of this
 * copy is utask->singlestep_addr.
 *
 * This function prepares to return from the post-single-step
 * interrupt.  We have to fix up the stack as follows:
 *
 * 0) Typically, the new eip is relative to the copied instruction.  We
 * need to make it relative to the original instruction.  Exceptions are
 * return instructions and absolute or indirect jump or call instructions.
 *
 * 1) If the single-stepped instruction was a call, the return address
 * that is atop the stack is the address following the copied instruction.
 * We need to make it the address following the original instruction.
 */
static
void uprobe_post_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt,
		struct pt_regs *regs)
{
	long next_eip = 0;
	long copy_eip = utask->singlestep_addr;
	long orig_eip = ppt->vaddr;
	uprobe_opcode_t *insn = ppt->insn;

	up_read(&ppt->slot->rwsem);

	if (insn[0] == 0x66)
		/* Skip operand-size prefix */
		insn++;

	switch (insn[0]) {
	case 0xc3:		/* ret/lret */
	case 0xcb:
	case 0xc2:
	case 0xca:
		next_eip = regs->eip;
		/* eip is already adjusted, no more changes required*/
		break;
	case 0xe8:		/* call relative - Fix return addr */
		adjust_ret_addr(regs->esp, (orig_eip - copy_eip), utask);
		break;
	case 0xff:
		if ((insn[1] & 0x30) == 0x10) {
			/* call absolute, indirect */
			/* Fix return addr; eip is correct. */
			next_eip = regs->eip;
			adjust_ret_addr(regs->esp, (orig_eip - copy_eip),
				utask);
		} else if ((insn[1] & 0x31) == 0x20 ||
			   (insn[1] & 0x31) == 0x21) {
			/* jmp near or jmp far  absolute indirect */
			/* eip is correct. */
			next_eip = regs->eip;
		}
		break;
	case 0xea:		/* jmp absolute -- eip is correct */
		next_eip = regs->eip;
		break;
	default:
		break;
	}

	if (next_eip)
		regs->eip = next_eip;
	else
		regs->eip = orig_eip + (regs->eip - copy_eip);
}

/*
 * Replace the return address with the trampoline address.  Returns
 * the original return address.
 */
static
unsigned long arch_hijack_uret_addr(unsigned long trampoline_address,
		struct pt_regs *regs, struct uprobe_task *utask)
{
	int nleft;
	unsigned long orig_ret_addr;
#define RASIZE (sizeof(unsigned long))

	nleft = copy_from_user(&orig_ret_addr,
		       (const void __user *)regs->esp, RASIZE);
	if (unlikely(nleft != 0))
		return 0;

	if (orig_ret_addr == trampoline_address)
		/*
		 * There's another uretprobe on this function, and it was
		 * processed first, so the return address has already
		 * been hijacked.
		 */
		return orig_ret_addr;

	nleft = copy_to_user((void __user *)regs->esp,
		       &trampoline_address, RASIZE);
	if (unlikely(nleft != 0)) {
		if (nleft != RASIZE) {
			printk(KERN_ERR "uretprobe_entry_handler: "
					"return address partially clobbered -- "
					"pid=%d, %%esp=%#lx, %%eip=%#lx\n",
					current->pid, regs->esp, regs->eip);
			utask->doomed = 1;
		} /* else nothing written, so no harm */
		return 0;
	}
	return orig_ret_addr;
}

/*
 * On x86_32, if a function returns a struct or union, the return
 * value is copied into an area created by the caller.  The address
 * of this area is passed on the stack as a "hidden" first argument.
 * When such a function returns, it uses a "ret $4" instruction to pop
 * not only the return address but also the hidden arg.  To accommodate
 * such functions, we add 4 bytes of slop when predicting the return
 * address. See PR #10078.
 */
#define STRUCT_RETURN_SLOP 4

static
unsigned long arch_predict_sp_at_ret(struct pt_regs *regs,
		struct task_struct *tsk)
{
	return (unsigned long) (regs->esp + 4 + STRUCT_RETURN_SLOP);
}