summaryrefslogtreecommitdiffstats
path: root/whichasm-0.01/classifier_x86.c
blob: a669d81d459b6b11eb41fe8bf77feca545186b70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/* 
 * Classifier for x86 assembly language
 *
 * Copyright (C) 2012 Red Hat, Inc.
 *
 */

/* x86 opcodes usually have one of two syntax forms (dest/source reversed):
 *    nmemonic <param1>, <param2>, <param3>...
 * Register names include the following:
 *    64-bit GPRS: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, R8-R15
 *    32-bit GPRS: EAX, EBX, ECX, EDX
 *    16-bit GPRS: AX, BX, CX, DX
 *    Segment registers: CS, DS, ES, FS, GS, SS
 *    Index and pointers: ESI, EDI, EBP, EIP, ESP, RIP
 *    Flags: FLAGS, EFLAGS, RFLAGS
 */

#include "x86.h"
#include <stdio.h>
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
#include <string.h>

#include "classifier.h"

int classifier_x86(char *token_name)
{
        const struct x86_opcode *opcode;
        const struct x86_reg *reg;

        for (opcode=x86_opcodes;opcode->assembler;opcode++) {
		// TODO - catch variants of assembly language mnemonics
		// previously limited to opcode->assembler length
		// need to catch assembly that has appended letters
		if (strlen(token_name) == strlen(opcode->assembler))
			if (0 == strncasecmp(token_name, opcode->assembler,
					     strlen(opcode->assembler))) {
				//printf("opcode: %s\n", opcode->assembler);
                        	return MNEMONIC;
			}
        }

        for (reg=x86_regs;reg->assembler;reg++) {
		if (strlen(token_name) == strlen(reg->assembler))
			if (0 == strncasecmp(reg->assembler,token_name,
					     strlen(token_name))) {
				//printf("x86 register match\n");
                        	return REGISTER;
			}
        }

        return UNKNOWN;
}