Man Linux: Main Page and Category List

NAME

       x86_disasm,   x86_disasm_forward,   x86_disasm_range  -  disassemble  a
       bytestream to x86 assembly language instructions

SYNOPSIS

       #include <libdis.h>

       typedef void (*DISASM_CALLBACK)( x86_insn_t *, void * );
       typedef long (*DISASM_RESOLVER)( x86_op_t *, x86_insn_t *, void * );

       int x86_disasm( unsigned char *buf, unsigned int buf_len,
                 unsigned long buf_rva, unsigned int offset,
                 x86_insn_t * insn );
       int x86_disasm_range( unsigned char *buf, unsigned long buf_rva,
                       unsigned int offset, unsigned int len,
                       DISASM_CALLBACK func, void *arg );
       int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
                      unsigned long buf_rva, unsigned int offset,
                      DISASM_CALLBACK func, void *arg,
                      DISASM_RESOLVER resolver, void *r_arg );

DESCRIPTION

       #define MAX_REGNAME 8
       #define MAX_PREFIX_STR 32
       #define MAX_MNEM_STR 16
       #define MAX_INSN_SIZE 20 /* same as in i386.h */
       #define MAX_OP_STRING 32 /* max possible operand size in string form */
       #define  MAX_OP_RAW_STRING  64     /*  max possible operand size in raw
       form */
       #define MAX_OP_XML_STRING 256   /* max possible  operand  size  in  xml
       form */
       #define  MAX_NUM_OPERANDS  8    /* max # implicit and explicit operands
       */
       #define MAX_INSN_STRING 512        /* 2 * 8 * MAX_OP_STRING */
       #define MAX_INSN_RAW_STRING 1024   /* 2 * 8 * MAX_OP_RAW_STRING */
       #define MAX_INSN_XML_STRING 4096   /* 2 * 8 * MAX_OP_XML_STRING */

       enum x86_reg_type {      /* NOTE: these may be ORed together */
            reg_gen     = 0x00001,   /* general purpose */
            reg_in      = 0x00002,   /* incoming args, ala RISC */
            reg_out     = 0x00004,   /* args to calls, ala RISC */
            reg_local   = 0x00008,   /* local vars, ala RISC */
            reg_fpu     = 0x00010,   /* FPU data register */
            reg_seg     = 0x00020,   /* segment register */
            reg_simd    = 0x00040,   /* SIMD/MMX reg */
            reg_sys     = 0x00080,   /* restricted/system register */
            reg_sp      = 0x00100,   /* stack pointer */
            reg_fp      = 0x00200,   /* frame pointer */
            reg_pc      = 0x00400,   /* program counter */
            reg_retaddr = 0x00800,   /* return addr for func */
            reg_cond    = 0x01000,   /* condition code / flags */
            reg_zero    = 0x02000,   /* zero register, ala RISC */
            reg_ret     = 0x04000,   /* return value */
            reg_src     = 0x10000,   /* array/rep source */
            reg_dest    = 0x20000,   /* array/rep destination */
            reg_count   = 0x40000    /* array/rep/loop counter */
       };

       typedef struct {
            char name[MAX_REGNAME];
            enum x86_reg_type type;            /* what register is used for */
            unsigned int size;            /* size of register in bytes */
            unsigned  int  id;                   /*  register  ID #, for quick
       compares */
            unsigned int alias;           /* ID of reg this is an alias of */
            unsigned int shift;           /* amount to shift aliased reg by */
       } x86_reg_t;

       typedef struct {
            unsigned int     scale;       /* scale factor */
            x86_reg_t        index, base; /* index, base registers */
            long             disp;        /* displacement */
            char             disp_sign;   /* is negative? 1/0 */
            char             disp_size;   /* 0, 1, 2, 4 */
       } x86_ea_t;

       enum x86_op_type {  /* mutually exclusive */
            op_unused = 0,      /* empty/unused operand */
            op_register = 1,    /* CPU register */
            op_immediate = 2,   /* Immediate Value */
            op_relative_near = 3,    /* Relative offset from IP */
            op_relative_far = 4,
            op_absolute = 5,    /* Absolute address (ptr16:32) */
            op_expression  = 6,  /* Address expression (scale/index/base/disp)
       */
            op_offset = 7,      /* Offset from start of segment (m32) */
            op_unknown
       };

       enum x86_op_datatype {        /* these use Intel’s lame terminology */
            op_byte = 1,        /* 1 byte integer */
            op_word = 2,        /* 2 byte integer */
            op_dword = 3,       /* 4 byte integer */
            op_qword = 4,       /* 8 byte integer */
            op_dqword = 5,      /* 16 byte integer */
            op_sreal = 6,       /* 4 byte real (single real) */
            op_dreal = 7,       /* 8 byte real (double real) */
            op_extreal = 8,          /* 10 byte real (extended real) */
            op_bcd = 9,         /* 10 byte binary-coded decimal */
            op_simd = 10,       /* 16 byte packed (SIMD, MMX) */
               op_ssimd = 10,          /* 16 byte : 4 packed single FP  (SIMD,
       MMX) */
               op_dsimd  = 11,          /* 16 byte : 2 packed double FP (SIMD,
       MMX) */
               op_sssimd = 12,         /* 4 byte :  scalar  single  FP  (SIMD,
       MMX) */
               op_sdsimd  =  13,          /*  8 byte : scalar double FP (SIMD,
       MMX) */
            op_descr32 = 14,    /* 6 byte Intel descriptor 2:4 */
            op_descr16 = 15,    /* 4 byte Intel descriptor 2:2 */
            op_pdescr32 = 16,   /* 6 byte Intel pseudo-descriptor 32:16 */
            op_pdescr16 = 17,   /* 6 byte Intel pseudo-descriptor 8:24:16 */
            op_fpuenv = 11      /* 28 byte FPU control/environment data */
       };

       enum x86_op_access {     /* ORed together */
            op_read = 1,
            op_write = 2,
            op_execute = 4
       };

       enum x86_op_flags { /* ORed together, but segs are  mutually  exclusive
       */
            op_signed = 1,      /* signed integer */
            op_string = 2,      /* possible string or array */
            op_constant = 4,    /* symbolic constant */
            op_pointer = 8,          /* operand points to a memory address */
            op_sysref = 0x010,  /* operand is a syscall number */
            op_implied = 0x020, /* operand is implicit in insn */
            op_hardcode = 0x040, /* operans is hardcoded in insn */
            op_es_seg = 0x100,  /* ES segment override */
            op_cs_seg = 0x200,  /* CS segment override */
            op_ss_seg = 0x300,  /* SS segment override */
            op_ds_seg = 0x400,  /* DS segment override */
            op_fs_seg = 0x500,  /* FS segment override */
            op_gs_seg = 0x600   /* GS segment override */
       };

       typedef struct {
            enum x86_op_type    type;          /* operand type */
            enum x86_op_datatype     datatype; /* operand size */
            enum x86_op_access  access;        /* operand access [RWX] */
            enum x86_op_flags   flags;         /* misc flags */
            union {
                 /* immediate values */
                 char           sbyte;
                 short          sword;
                 long           sdword;
                 qword          sqword;
                 unsigned char  byte;
                 unsigned short      word;
                 unsigned long  dword;
                 qword               qword;
                 float             sreal;
                 double            dreal;
                 /* misc large/non-native types */
                 unsigned char  extreal[10];
                 unsigned char  bcd[10];
                 qword          dqword[2];
                 unsigned char  simd[16];
                 unsigned char  fpuenv[28];
                 /* absolute address */
                 void           * address;
                 /* offset from segment */
                 unsigned long  offset;
                 /* ID of CPU register */
                 x86_reg_t reg;
                 /* offsets from current insn */
                 char           relative_near;
                 long           relative_far;
                 /* effective address [expression] */
                 x86_ea_t  expression;
            } data;
            void * insn;
       } x86_op_t;

       typedef struct x86_operand_list {
            x86_op_t op;
            struct x86_operand_list *next;
       } x86_oplist_t;

       enum x86_insn_group {
            insn_none = 0,
            insn_controlflow = 1,
            insn_arithmetic = 2,
            insn_logic = 3,
            insn_stack = 4,
            insn_comparison = 5,
            insn_move = 6,
            insn_string = 7,
            insn_bit_manip = 8,
            insn_flag_manip = 9,
            insn_fpu = 10,
            insn_interrupt = 13,
            insn_system = 14,
            insn_other = 15
       };

       enum x86_insn_type {
            insn_invalid = 0,
            /* insn_controlflow */
            insn_jmp = 0x1001,
            insn_jcc = 0x1002,
            insn_call = 0x1003,
            insn_callcc = 0x1004,
            insn_return = 0x1005,
            insn_loop = 0x1006,
            /* insn_arithmetic */
            insn_add = 0x2001,
            insn_sub = 0x2002,
            insn_mul = 0x2003,
            insn_div = 0x2004,
            insn_inc = 0x2005,
            insn_dec = 0x2006,
            insn_shl = 0x2007,
            insn_shr = 0x2008,
            insn_rol = 0x2009,
            insn_ror = 0x200A,
            /* insn_logic */
            insn_and = 0x3001,
            insn_or = 0x3002,
            insn_xor = 0x3003,
            insn_not = 0x3004,
            insn_neg = 0x3005,
            /* insn_stack */
            insn_push = 0x4001,
            insn_pop = 0x4002,
            insn_pushregs = 0x4003,
            insn_popregs = 0x4004,
            insn_pushflags = 0x4005,
            insn_popflags = 0x4006,
            insn_enter = 0x4007,
            insn_leave = 0x4008,
            /* insn_comparison */
            insn_test = 0x5001,
            insn_cmp = 0x5002,
            /* insn_move */
            insn_mov = 0x6001,  /* move */
            insn_movcc = 0x6002,     /* conditional move */
            insn_xchg = 0x6003, /* exchange */
            insn_xchgcc = 0x6004,    /* conditional exchange */
            /* insn_string */
            insn_strcmp = 0x7001,
            insn_strload = 0x7002,
            insn_strmov = 0x7003,
            insn_strstore = 0x7004,
            insn_translate = 0x7005, /* xlat */
            /* insn_bit_manip */
            insn_bittest = 0x8001,
            insn_bitset = 0x8002,
            insn_bitclear = 0x8003,
            /* insn_flag_manip */
            insn_clear_carry = 0x9001,
            insn_clear_zero = 0x9002,
            insn_clear_oflow = 0x9003,
            insn_clear_dir = 0x9004,
            insn_clear_sign = 0x9005,
            insn_clear_parity = 0x9006,
            insn_set_carry = 0x9007,
            insn_set_zero = 0x9008,
            insn_set_oflow = 0x9009,
            insn_set_dir = 0x900A,
            insn_set_sign = 0x900B,
            insn_set_parity = 0x900C,
            insn_tog_carry = 0x9010,
            insn_tog_zero = 0x9020,
            insn_tog_oflow = 0x9030,
            insn_tog_dir = 0x9040,
            insn_tog_sign = 0x9050,
            insn_tog_parity = 0x9060,
            /* insn_fpu */
            insn_fmov = 0xA001,
            insn_fmovcc = 0xA002,
            insn_fneg = 0xA003,
            insn_fabs = 0xA004,
            insn_fadd = 0xA005,
            insn_fsub = 0xA006,
            insn_fmul = 0xA007,
            insn_fdiv = 0xA008,
            insn_fsqrt = 0xA009,
            insn_fcmp = 0xA00A,
            insn_fcos = 0xA00C,
            insn_fldpi = 0xA00D,
            insn_fldz = 0xA00E,
            insn_ftan = 0xA00F,
            insn_fsine = 0xA010,
            insn_fsys = 0xA020,
            /* insn_interrupt */
            insn_int = 0xD001,
            insn_intcc = 0xD002,     /* not present in x86 ISA */
            insn_iret = 0xD003,
            insn_bound = 0xD004,
            insn_debug = 0xD005,
            insn_trace = 0xD006,
            insn_invalid_op = 0xD007,
            insn_oflow = 0xD008,
            /* insn_system */
            insn_halt = 0xE001,
            insn_in = 0xE002,   /* input from port/bus */
            insn_out = 0xE003,  /* output to port/bus */
            insn_cpuid = 0xE004,
            /* insn_other */
            insn_nop = 0xF001,
            insn_bcdconv = 0xF002,   /* convert to or from BCD */
            insn_szconv = 0xF003     /* change size of operand */
       };

       enum x86_insn_note {
            insn_note_ring0          = 1, /* Only available in ring 0 */
            insn_note_smm       = 2, /* "" in System Management Mode */
            insn_note_serial    = 4  /* Serializing instruction */
       };

       enum x86_flag_status {
            insn_carry_set = 0x1,
            insn_zero_set = 0x2,
            insn_oflow_set = 0x4,
            insn_dir_set = 0x8,
            insn_sign_set = 0x10,
            insn_parity_set = 0x20,
            insn_carry_or_zero_set = 0x40,
            insn_zero_set_or_sign_ne_oflow = 0x80,
            insn_carry_clear = 0x100,
            insn_zero_clear = 0x200,
            insn_oflow_clear = 0x400,
            insn_dir_clear = 0x800,
            insn_sign_clear = 0x1000,
            insn_parity_clear = 0x2000,
            insn_sign_eq_oflow = 0x4000,
            insn_sign_ne_oflow = 0x8000
       };

       enum x86_insn_cpu {
            cpu_8086  = 1, /* Intel */
            cpu_80286 = 2,
            cpu_80386 = 3,
            cpu_80387 = 4,
            cpu_80486 = 5,
            cpu_pentium    = 6,
            cpu_pentiumpro = 7,
            cpu_pentium2   = 8,
            cpu_pentium3   = 9, >br>      cpu_pentium4   = 10,
            cpu_k6         = 16,     /* AMD */
            cpu_k7         = 32,
            cpu_athlon     = 48
       };

       enum x86_insn_isa {
            isa_gp         = 1, /* general purpose */
            isa_fp         = 2, /* floating point */
            isa_fpumgt     = 3, /* FPU/SIMD management */
            isa_mmx        = 4, /* Intel MMX */
            isa_sse1  = 5, /* Intel SSE SIMD */
            isa_sse2  = 6, /* Intel SSE2 SIMD */
            isa_sse3  = 7, /* Intel SSE3 SIMD */
            isa_3dnow = 8, /* AMD 3DNow! SIMD */
            isa_sys        = 9  /* system instructions */
       };

       enum x86_insn_prefix {
            insn_no_prefix = 0,
            insn_rep_zero = 1,
            insn_rep_notzero = 2,
            insn_lock = 4,
            insn_delay = 8
       };

       typedef struct {
            /* information about the instruction */
            unsigned long addr;      /* load address */
            unsigned long offset;         /* offset into file/buffer */
            enum x86_insn_group group;    /* meta-type, e.g. INSN_EXEC */
            enum x86_insn_type type; /* type, e.g. INSN_BRANCH */
            unsigned char bytes[MAX_INSN_SIZE];
            unsigned char size;      /* size of insn in bytes */
            enum x86_insn_prefix prefix;
            enum x86_flag_status flags_set; /* flags set or tested by insn */
            enum x86_flag_status flags_tested;
            /* the instruction proper */
            char prefix_string[32];       /* prefixes [might be truncated] */
            char mnemonic[8];
            x86_op_t operands[3];
            /* convenience fields for user */
            void *block;             /* code block containing this insn */
            void *function;               /* function containing this insn */
            void *tag;               /* tag the insn as seen/processed */
       } x86_insn_t;
       #define X86_WILDCARD_BYTE 0xF4

       typedef struct {
               enum x86_op_type        type;           /* operand type */
               enum x86_op_datatype    datatype;       /* operand size */
               enum x86_op_access      access;         /* operand access [RWX]
       */
               enum x86_op_flags       flags;          /* misc flags */
       } x86_invariant_op_t;

       typedef struct {
            unsigned char bytes[64]; /* invariant representation */
            unsigned int  size;      /* number of bytes in insn */
               enum x86_insn_group group;      /* meta-type, e.g. INSN_EXEC */
               enum x86_insn_type type;        /* type, e.g. INSN_BRANCH */
            x86_invariant_op_t   operands[3];    /*   use   same  ordering  as
       x86_insn_t */
       } x86_invariant_t;

EXAMPLES

       The following sample callback outputs instructions in raw syntax:

       void raw_print( x86_insn_t *insn, void *arg ) {
               char line[1024];
               x86_format_insn(insn, line, 1024, raw_syntax);
               printf( "%s0, line);
       }

       The following sample resolver performs very  limited  checking  on  the
       operand of a jump or call to determine what program address the operand
       refers to:

       long resolver( x86_op_t *op, x86_insn_t *insn ) {
               long retval = -1;
               /* this is a flat ripoff of internal_resolver in libdis.c --
                  we don’t do any register or stack resolving, or check
                  to see if we have already encountered this RVA */
               if ( op->type == op_absolute || op->type == op_offset ) {
                       retval = op->data.sdword;
               } else if (op->type == op_relative ){
                       if ( op->datatype == op_byte ) {
                               retval   =   insn->addr    +    insn->size    +
       op->data.sbyte;
                       } else if ( op->datatype == op_word ) {
                               retval    =    insn->addr    +   insn->size   +
       op->data.sword;
                       } else if ( op->datatype == op_dword ) {
                               retval   =   insn->addr    +    insn->size    +
       op->data.sdword;
                       }
               }
               return( retval );
       }

       The  following  code  snippets  demonstrate  how  to  use  the  various
       disassembly routines:

               unsigned char *buf;  /* buffer of bytes to disassemble */
               unsigned int buf_len;/* length of buffer */
               unsigned long rva;   /* load address of start of buffer */
               unsigned int pos;    /* position in buffer */
               x86_insn_t insn;     /* disassembled instruction */

               /* disassemble entire buffer, printing automatically */
               x86_disasm_range( buf, buf_rva, pos, buf_len,
                                 raw_print, NULL );

               /* disassemble a single instruction, then print it */
               if (x86_disasm( buf, buf_len, buf_rva, pos, &insn ) ) {
                       raw_print( &insn, NULL );
               }

               /* disassemble forward in ’buf’ starting at ’pos’ */
               x86_disasm_forward( buf, buf_len, buf_rva, pos,
                                   raw_print, NULL, resolver );

SEE ALSO

       libdisasm(7), x86_format_insn(3), x86_init(3), x86dis(1)