1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
/*
* Classifier for x86 assembly language
*
* Copyright (C) 2012 Red Hat, Inc.
*
*/
/* x86 opcodes usually have one of two syntax forms (dest/source reversed):
* nmemonic <param1>, <param2>, <param3>...
* Register names include the following:
* 64-bit GPRS: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, R8-R15
* 32-bit GPRS: EAX, EBX, ECX, EDX
* 16-bit GPRS: AX, BX, CX, DX
* Segment registers: CS, DS, ES, FS, GS, SS
* Index and pointers: ESI, EDI, EBP, EIP, ESP, RIP
* Flags: FLAGS, EFLAGS, RFLAGS
*/
#include "x86.h"
#include <stdio.h>
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
#include <string.h>
#include "classifier.h"
int classifier_x86(char *token_name)
{
const struct x86_opcode *opcode;
const struct x86_reg *reg;
for (opcode=x86_opcodes;opcode->assembler;opcode++) {
// TODO - catch variants of assembly language mnemonics
// previously limited to opcode->assembler length
// need to catch assembly that has appended letters
if (strlen(token_name) == strlen(opcode->assembler))
if (0 == strncasecmp(token_name, opcode->assembler,
strlen(opcode->assembler))) {
//printf("opcode: %s\n", opcode->assembler);
return MNEMONIC;
}
}
for (reg=x86_regs;reg->assembler;reg++) {
if (strlen(token_name) == strlen(reg->assembler))
if (0 == strncasecmp(reg->assembler,token_name,
strlen(token_name))) {
//printf("x86 register match\n");
return REGISTER;
}
}
return UNKNOWN;
}
|