/* * Classifier for x86 assembly language * * Copyright (C) 2012 Red Hat, Inc. * */ /* x86 opcodes usually have one of two syntax forms (dest/source reversed): * nmemonic , , ... * Register names include the following: * 64-bit GPRS: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, R8-R15 * 32-bit GPRS: EAX, EBX, ECX, EDX * 16-bit GPRS: AX, BX, CX, DX * Segment registers: CS, DS, ES, FS, GS, SS * Index and pointers: ESI, EDI, EBP, EIP, ESP, RIP * Flags: FLAGS, EFLAGS, RFLAGS */ #include "x86.h" #include #include #include #include #include #include "classifier.h" int classifier_x86(char *token_name) { const struct x86_opcode *opcode; const struct x86_reg *reg; for (opcode=x86_opcodes;opcode->assembler;opcode++) { // TODO - catch variants of assembly language mnemonics // previously limited to opcode->assembler length // need to catch assembly that has appended letters if (strlen(token_name) == strlen(opcode->assembler)) if (0 == strncasecmp(token_name, opcode->assembler, strlen(opcode->assembler))) { //printf("opcode: %s\n", opcode->assembler); return MNEMONIC; } } for (reg=x86_regs;reg->assembler;reg++) { if (strlen(token_name) == strlen(reg->assembler)) if (0 == strncasecmp(reg->assembler,token_name, strlen(token_name))) { //printf("x86 register match\n"); return REGISTER; } } return UNKNOWN; }