NAME
libdisasm - library for disassembling Intel x86 instructions
OVERVIEW
libdisasm is a library for disassembled compiled Intel x86 object code.
FUNCTIONS
int x86_init( enum x86_options options, DISASM_REPORTER reporter);
void x86_set_options( enum x86_options options );
int x86_get_options( void );
int x86_cleanup(void);
void x86_report_error( enum x86_report_codes code, void *arg );
int x86_disasm( unsigned char *buf, unsigned int buf_len,
unsigned long buf_rva, unsigned int offset,
x86_insn_t * insn );
int x86_disasm_range( unsigned char *buf, unsigned long buf_rva,
unsigned int offset, unsigned int len,
DISASM_CALLBACK func, void *arg );
int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
unsigned long buf_rva, unsigned int offset,
DISASM_CALLBACK func, void *arg,
DISASM_RESOLVER resolver );
x86_op_t * x86_get_operand( x86_insn_t *insn, enum x86_operand_id id );
x86_op_t * x86_get_dest_operand( x86_insn_t *insn );
x86_op_t * x86_get_src_operand( x86_insn_t *insn );
x86_op_t * x86_get_imm_operand( x86_insn_t *insn );
int x86_operand_size( x86_op_t *op );
unsigned char * x86_get_raw_imm( x86_insn_t *insn );
void x86_set_insn_addr( x86_insn_t *insn, unsigned long addr );
void x86_set_insn_offset( x86_insn_t *insn, unsigned int offset );
void x86_set_insn_function( x86_insn_t *insn, void * func );
void x86_set_insn_block( x86_insn_t *insn, void * block );
void x86_tag_insn( x86_insn_t *insn );
void x86_untag_insn( x86_insn_t *insn );
int x86_insn_is_tagged( x86_insn_t *insn );
int x86_format_operand(x86_op_t *op, x86_insn_t *insn, char *buf, int
len,
enum x86_asm_format);
int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
enum x86_asm_format);
int x86_format_insn(x86_insn_t *insn, char *buf, int len, enum
x86_asm_format);
int x86_format_header( char *buf, int len, enum x86_asm_format format);
int x86_endian(void);
int x86_addr_size(void);
int x86_op_size(void);
int x86_word_size(void);
int x86_max_inst_size(void);
int x86_sp_reg(void);
int x86_fp_reg(void);
int x86_ip_reg();
int x86_invariant_insn( unsigned char *buf, int buf_len,
x86_invariant_t *inv );
DATA TYPES
#define MAX_REGNAME 8
#define MAX_INSN_SIZE 20 /* same as in i386.h */
#define MAX_OP_STRING 32 /* max possible operand size in string form */
enum x86_reg_type { /* NOTE: these may be ORed together */
reg_gen = 0x00001, /* general purpose */
reg_in = 0x00002, /* incoming args, ala RISC */
reg_out = 0x00004, /* args to calls, ala RISC */
reg_local = 0x00008, /* local vars, ala RISC */
reg_fpu = 0x00010, /* FPU data register */
reg_seg = 0x00020, /* segment register */
reg_simd = 0x00040, /* SIMD/MMX reg */
reg_sys = 0x00080, /* restricted/system register */
reg_sp = 0x00100, /* stack pointer */
reg_fp = 0x00200, /* frame pointer */
reg_pc = 0x00400, /* program counter */
reg_retaddr = 0x00800, /* return addr for func */
reg_cond = 0x01000, /* condition code / flags */
reg_zero = 0x02000, /* zero register, ala RISC */
reg_ret = 0x04000, /* return value */
reg_src = 0x10000, /* array/rep source */
reg_dest = 0x20000, /* array/rep destination */
reg_count = 0x40000 /* array/rep/loop counter */
};
typedef struct {
char name[MAX_REGNAME];
int type; /* what register is used for */
int size; /* size of register in bytes */
int id; /* register ID #, for quick compares */
} x86_reg_t;
typedef struct {
unsigned int scale; /* scale factor */
x86_reg_t index, base; /* index, base registers */
long disp; /* displacement */
char disp_sign; /* is negative? 1/0 */
char disp_size; /* 0, 1, 2, 4 */
} x86_ea_t;
enum x86_op_type { /* mutually exclusive */
op_unused = 0, /* empty/unused operand */
op_register = 1, /* CPU register */
op_immediate = 2, /* Immediate Value */
op_relative = 3, /* Relative offset from IP */
op_absolute = 4, /* Absolute address (ptr16:32) */
op_expression = 5, /* Address expression (scale/index/base/disp)
*/
op_offset = 6, /* Offset from start of segment (m32) */
op_unknown
};
enum x86_op_datatype { /* these use Intel’s lame terminology */
op_byte = 1, /* 1 byte integer */
op_word = 2, /* 2 byte integer */
op_dword = 3, /* 4 byte integer */
op_qword = 4, /* 8 byte integer */
op_dqword = 5, /* 16 byte integer */
op_sreal = 6, /* 4 byte real (single real) */
op_dreal = 7, /* 8 byte real (double real) */
op_extreal = 8, /* 10 byte real (extended real) */
op_bcd = 9, /* 10 byte binary-coded decimal */
op_simd = 10, /* 16 byte packed (SIMD, MMX) */
op_fpuenv = 11 /* 28 byte FPU control/environment data */
};
enum x86_op_access { /* ORed together */
op_read = 1,
op_write = 2,
op_execute = 4
};
enum x86_op_flags { /* ORed together, but segs are mutually exclusive
*/
op_signed = 1, /* signed integer */
op_string = 2, /* possible string or array */
op_constant = 4, /* symbolic constant */
op_pointer = 8, /* operand points to a memory address */
op_es_seg = 0x100, /* ES segment override */
op_cs_seg = 0x200, /* CS segment override */
op_ss_seg = 0x300, /* SS segment override */
op_ds_seg = 0x400, /* DS segment override */
op_fs_seg = 0x500, /* FS segment override */
op_gs_seg = 0x600 /* GS segment override */
};
typedef struct {
enum x86_op_type type; /* operand type */
enum x86_op_datatype datatype; /* operand size */
enum x86_op_access access; /* operand access [RWX] */
enum x86_op_flags flags; /* misc flags */
union {
/* immediate values */
char sbyte;
short sword;
long sdword;
unsigned char byte;
unsigned short word;
unsigned long dword;
qword sqword;
float sreal;
double dreal;
/* misc large/non-native types */
unsigned char extreal[10];
unsigned char bcd[10];
qword dqword[2];
unsigned char simd[16];
unsigned char fpuenv[28];
/* absolute address */
void * address;
/* offset from segment */
unsigned long offset;
/* ID of CPU register */
x86_reg_t reg;
/* offsets from current insn */
char near_offset;
long far_offset;
/* effective address [expression] */
x86_ea_t effective_addr;
} data;
} x86_op_t;
enum x86_insn_group {
insn_controlflow = 1,
insn_arithmetic = 2,
insn_logic = 3,
insn_stack = 4,
insn_comparison = 5,
insn_move = 6,
insn_string = 7,
insn_bit_manip = 8,
insn_flag_manip = 9,
insn_fpu = 10,
insn_interrupt = 13,
insn_system = 14,
insn_other = 15
};
enum x86_insn_type {
/* insn_controlflow */
insn_jmp = 0x1001,
insn_jcc = 0x1002,
insn_call = 0x1003,
insn_callcc = 0x1004,
insn_return = 0x1005,
insn_loop = 0x1006,
/* insn_arithmetic */
insn_add = 0x2001,
insn_sub = 0x2002,
insn_mul = 0x2003,
insn_div = 0x2004,
insn_inc = 0x2005,
insn_dec = 0x2006,
insn_shl = 0x2007,
insn_shr = 0x2008,
insn_rol = 0x2009,
insn_ror = 0x200A,
/* insn_logic */
insn_and = 0x3001,
insn_or = 0x3002,
insn_xor = 0x3003,
insn_not = 0x3004,
insn_neg = 0x3005,
/* insn_stack */
insn_push = 0x4001,
insn_pop = 0x4002,
insn_pushregs = 0x4003,
insn_popregs = 0x4004,
insn_pushflags = 0x4005,
insn_popflags = 0x4006,
insn_enter = 0x4007,
insn_leave = 0x4008,
/* insn_comparison */
insn_test = 0x5001,
insn_cmp = 0x5002,
/* insn_move */
insn_mov = 0x6001, /* move */
insn_movcc = 0x6002, /* conditional move */
insn_xchg = 0x6003, /* exchange */
insn_xchgcc = 0x6004, /* conditional exchange */
/* insn_string */
insn_strcmp = 0x7001,
insn_strload = 0x7002,
insn_strmov = 0x7003,
insn_strstore = 0x7004,
insn_translate = 0x7005, /* xlat */
/* insn_bit_manip */
insn_bittest = 0x8001,
insn_bitset = 0x8002,
insn_bitclear = 0x8003,
/* insn_flag_manip */
insn_clear_carry = 0x9001,
insn_clear_zero = 0x9002,
insn_clear_oflow = 0x9003,
insn_clear_dir = 0x9004,
insn_clear_sign = 0x9005,
insn_clear_parity = 0x9006,
insn_set_carry = 0x9007,
insn_set_zero = 0x9008,
insn_set_oflow = 0x9009,
insn_set_dir = 0x900A,
insn_set_sign = 0x900B,
insn_set_parity = 0x900C,
insn_tog_carry = 0x9010,
insn_tog_zero = 0x9020,
insn_tog_oflow = 0x9030,
insn_tog_dir = 0x9040,
insn_tog_sign = 0x9050,
insn_tog_parity = 0x9060,
/* insn_fpu */
insn_fmov = 0xA001,
insn_fmovcc = 0xA002,
insn_fneg = 0xA003,
insn_fabs = 0xA004,
insn_fadd = 0xA005,
insn_fsub = 0xA006,
insn_fmul = 0xA007,
insn_fdiv = 0xA008,
insn_fsqrt = 0xA009,
insn_fcmp = 0xA00A,
insn_fcos = 0xA00C,
insn_fldpi = 0xA00D,
insn_fldz = 0xA00E,
insn_ftan = 0xA00F,
insn_fsine = 0xA010,
insn_fsys = 0xA020,
/* insn_interrupt */
insn_int = 0xD001,
insn_intcc = 0xD002, /* not present in x86 ISA */
insn_iret = 0xD003,
insn_bound = 0xD004,
insn_debug = 0xD005,
insn_trace = 0xD006,
insn_invalid_op = 0xD007,
insn_oflow = 0xD008,
/* insn_system */
insn_halt = 0xE001,
insn_in = 0xE002, /* input from port/bus */
insn_out = 0xE003, /* output to port/bus */
insn_cpuid = 0xE004,
/* insn_other */
insn_nop = 0xF001,
insn_bcdconv = 0xF002, /* convert to or from BCD */
insn_szconv = 0xF003 /* change size of operand */
};
enum x86_flag_status {
insn_carry_set = 0x1,
insn_zero_set = 0x2,
insn_oflow_set = 0x4,
insn_dir_set = 0x8,
insn_sign_set = 0x10,
insn_parity_set = 0x20,
insn_carry_or_zero_set = 0x40,
insn_zero_set_or_sign_ne_oflow = 0x80,
insn_carry_clear = 0x100,
insn_zero_clear = 0x200,
insn_oflow_clear = 0x400,
insn_dir_clear = 0x800,
insn_sign_clear = 0x1000,
insn_parity_clear = 0x2000,
insn_sign_eq_oflow = 0x4000,
insn_sign_ne_oflow = 0x8000
};
enum x86_insn_prefix {
insn_no_prefix = 0,
insn_rep_zero = 1,
insn_rep_notzero = 2,
insn_lock = 4,
insn_delay = 8
};
enum x86_operand_id { op_dest=0, op_src=1, op_imm=2 };
typedef struct {
/* information about the instruction */
unsigned long addr; /* load address */
unsigned long offset; /* offset into file/buffer */
enum x86_insn_group group; /* meta-type, e.g. INSN_EXEC */
enum x86_insn_type type; /* type, e.g. INSN_BRANCH */
unsigned char bytes[MAX_INSN_SIZE];
unsigned char size; /* size of insn in bytes */
enum x86_insn_prefix prefix;
enum x86_flag_status flags_set; /* flags set or tested by insn */
enum x86_flag_status flags_tested;
/* the instruction proper */
char prefix_string[32]; /* prefixes [might be truncated] */
char mnemonic[8];
x86_op_t operands[3];
/* convenience fields for user */
void *block; /* code block containing this insn */
void *function; /* function containing this insn */
void *tag; /* tag the insn as seen/processed */
} x86_insn_t;
#define X86_WILDCARD_BYTE 0xF4
typedef struct {
enum x86_op_type type; /* operand type */
enum x86_op_datatype datatype; /* operand size */
enum x86_op_access access; /* operand access [RWX]
*/
enum x86_op_flags flags; /* misc flags */
} x86_invariant_op_t;
typedef struct {
unsigned char bytes[64]; /* invariant representation */
unsigned int size; /* number of bytes in insn */
enum x86_insn_group group; /* meta-type, e.g. INSN_EXEC */
enum x86_insn_type type; /* type, e.g. INSN_BRANCH */
x86_invariant_op_t operands[3]; /* use same ordering as
x86_insn_t */
} x86_invariant_t;
typedef long (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t *
current_insn );
typedef void (*DISASM_CALLBACK)( x86_insn_t *insn, void * arg );
typedef void (*DISASM_REPORTER)( enum x86_report_codes code, void *arg
);
enum x86_options {
opt_none= 0,
opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes
*/
opt_16_bit=2, /* 16-bit/DOS disassembly */
opt_unknown
};
enum x86_report_codes {
report_disasm_bounds,
report_insn_bounds,
report_invalid_insn,
report_unknown
};
EXAMPLES
See programs quikdis.c, testdis.c, and x86dis.c in the source code
distribution.
NOTES
The x86-64 architecture is not supported..LP
AUTHOR
mammon_ <mammon_@users.sourceforge.net>
CONTRIBUTORS
xavier <xvr@users.sourceforge.net> mok <moren_k@users.sourceforge.net>
SEE ALSO
x86_disasm(3), x86_format_insn(3), x86_init(3), x86dis(1)