From 955abacfb1a11549b0979ee9d5c54ae474c74201 Mon Sep 17 00:00:00 2001 From: Pierre Pronchery Date: Fri, 29 Apr 2011 05:51:36 +0000 Subject: [PATCH] Exposing the Code class to both Arch and Format to avoid defining callbacks --- include/Asm/arch.h | 5 ++ include/Asm/asm.h | 9 +-- include/Asm/format.h | 4 ++ src/Makefile | 14 ++--- src/arch.c | 45 +++++++++++--- src/arch.h | 6 +- src/arch/dalvik.c | 19 +++++- src/arch/dalvik.ins | 5 +- src/code.c | 141 ++++++++++++++++++++++++++----------------- src/code.h | 15 +++-- src/format.c | 44 ++++++++------ src/format.h | 7 ++- src/format/dex.c | 87 ++++++++++++++++++++++++-- src/main.c | 1 - src/parser.c | 1 + src/project.conf | 14 ++--- 16 files changed, 302 insertions(+), 115 deletions(-) diff --git a/include/Asm/arch.h b/include/Asm/arch.h index 7aa1da7..b81e36c 100644 --- a/include/Asm/arch.h +++ b/include/Asm/arch.h @@ -20,6 +20,7 @@ # include # include +# include "asm.h" /* AsmArch */ @@ -76,6 +77,7 @@ typedef enum _ArchOperandType /* immediate refers */ # define AOI_REFERS_STRING 0x1 +# define AOI_REFERS_FUNCTION 0x2 /* macros */ # define AO_GET_FLAGS(operand) ((operand & AOM_FLAGS) >> AOD_FLAGS) @@ -136,6 +138,7 @@ typedef struct _ArchOperand /* AOT_IMMEDIATE */ struct { + char const * name; /* optional */ uint64_t value; int negative; } immediate; @@ -187,12 +190,14 @@ typedef struct _ArchPluginHelper /* callbacks */ /* accessors */ char const * (*get_filename)(Arch * arch); + AsmFunction * (*get_function_by_id)(Arch * arch, AsmId id); ArchInstruction * (*get_instruction_by_opcode)(Arch * arch, uint8_t size, uint32_t opcode); ArchRegister * (*get_register_by_id_size)(Arch * arch, uint32_t id, uint32_t size); ArchRegister * (*get_register_by_name_size)(Arch * arch, char const * name, uint32_t size); + AsmString * (*get_string_by_id)(Arch * arch, AsmId id); /* assembly */ ssize_t (*write)(Arch * arch, void const * buf, size_t size); diff --git a/include/Asm/asm.h b/include/Asm/asm.h index d9c4627..c54c426 100644 --- a/include/Asm/asm.h +++ b/include/Asm/asm.h @@ -18,7 +18,7 @@ #ifndef DEVEL_ASM_ASM_H # define DEVEL_ASM_ASM_H -# include +# include /* Asm */ @@ -43,9 +43,10 @@ typedef struct _AsmLabel typedef struct _AsmString { - AsmId id; - char const * string; - ssize_t size; + int id; + char const * name; + off_t offset; + ssize_t length; } AsmString; typedef enum _AsmPluginType { APT_ARCH = 0, APT_FORMAT } AsmPluginType; diff --git a/include/Asm/format.h b/include/Asm/format.h index d8a4d75..54f5327 100644 --- a/include/Asm/format.h +++ b/include/Asm/format.h @@ -19,6 +19,7 @@ # define DEVEL_ASM_FORMAT_H # include +# include "asm.h" /* AsmFormat */ @@ -44,6 +45,9 @@ typedef struct _FormatPluginHelper /* disassembly */ /* FIXME let a different architecture be specified in the callback */ + AsmString * (*get_string_by_id)(Format * format, AsmId id); + int (*set_function)(Format * format, int id, char const * name, + off_t offset, ssize_t size); int (*set_string)(Format * format, int id, char const * name, off_t offset, ssize_t size); int (*decode)(Format * format, char const * section, diff --git a/src/Makefile b/src/Makefile index b1dc051..8aabd78 100644 --- a/src/Makefile +++ b/src/Makefile @@ -54,28 +54,28 @@ deasm_LDFLAGS = $(LDFLAGSF) $(LDFLAGS) -L. -Wl,-rpath,$(LIBDIR) -lasm deasm: $(deasm_OBJS) libasm.so $(CC) -o deasm $(deasm_OBJS) $(deasm_LDFLAGS) -arch.o: arch.c arch.h ../config.h +arch.o: arch.c ../include/Asm/arch.h ../include/Asm/asm.h arch.h code.h ../config.h $(CC) $(libasm_CFLAGS) -c arch.c asm.o: asm.c ../include/Asm.h code.h parser.h ../config.h $(CC) $(libasm_CFLAGS) -c asm.c -code.o: code.c ../include/Asm.h arch.h code.h format.h +code.o: code.c ../include/Asm.h ../include/Asm/arch.h ../include/Asm/asm.h ../include/Asm/format.h arch.h code.h format.h $(CC) $(libasm_CFLAGS) -c code.c -format.o: format.c format.h ../config.h +format.o: format.c ../include/Asm/asm.h ../include/Asm/format.h code.h format.h ../config.h $(CC) $(libasm_CFLAGS) -c format.c -parser.o: parser.c parser.h +parser.o: parser.c parser.h token.h $(CC) $(libasm_CFLAGS) -c parser.c -token.o: token.c token.h +token.o: token.c common.h token.h $(CC) $(libasm_CFLAGS) -c token.c -main.o: main.c ../include/Asm.h +main.o: main.c ../include/Asm/asm.h ../config.h $(CC) $(asm_CFLAGS) -c main.c -deasm.o: deasm.c ../include/Asm.h +deasm.o: deasm.c ../include/Asm.h ../include/Asm/arch.h ../include/Asm/asm.h ../include/Asm/format.h ../config.h $(CC) $(deasm_CFLAGS) -c deasm.c clean: diff --git a/src/arch.c b/src/arch.c index 9110d99..5233143 100644 --- a/src/arch.c +++ b/src/arch.c @@ -46,6 +46,7 @@ struct _Arch size_t registers_cnt; /* internal */ + Code * code; off_t base; char const * filename; FILE * fp; @@ -64,6 +65,8 @@ struct _Arch /* prototypes */ /* callbacks */ static char const * _arch_get_filename(Arch * arch); +static AsmFunction * _arch_get_function_by_id(Arch * arch, AsmId id); +static AsmString * _arch_get_string_by_id(Arch * arch, AsmId id); static ssize_t _arch_read(Arch * arch, void * buf, size_t size); static ssize_t _arch_read_buffer(Arch * arch, void * buf, size_t size); static off_t _arch_seek(Arch * arch, off_t offset, int whence); @@ -431,8 +434,10 @@ ArchRegister * arch_get_register_by_name_size(Arch * arch, char const * name, /* useful */ /* arch_decode */ -int arch_decode(Arch * arch, ArchInstructionCall ** calls, size_t * calls_cnt) +int arch_decode(Arch * arch, Code * code, ArchInstructionCall ** calls, + size_t * calls_cnt) { + int ret = 0; ArchInstructionCall * c = NULL; size_t c_cnt = 0; ArchInstructionCall * p; @@ -440,12 +445,14 @@ int arch_decode(Arch * arch, ArchInstructionCall ** calls, size_t * calls_cnt) if(arch->plugin->decode == NULL) return -error_set_code(1, "%s: %s", arch->plugin->name, "Disassembly not supported"); + arch->code = code; for(;;) { if((p = realloc(c, sizeof(*c) * (c_cnt + 1))) == NULL) { free(c); - return -error_set_code(1, "%s", strerror(errno)); + ret = -error_set_code(1, "%s", strerror(errno)); + break; } c = p; p = &c[c_cnt]; @@ -457,14 +464,18 @@ int arch_decode(Arch * arch, ArchInstructionCall ** calls, size_t * calls_cnt) p->size = arch->buffer_pos - p->offset; c_cnt++; } - *calls = c; - *calls_cnt = c_cnt; - return 0; + if(ret == 0) + { + *calls = c; + *calls_cnt = c_cnt; + } + arch->code = NULL; + return ret; } /* arch_decode_at */ -int arch_decode_at(Arch * arch, ArchInstructionCall ** calls, +int arch_decode_at(Arch * arch, Code * code, ArchInstructionCall ** calls, size_t * calls_cnt, off_t offset, size_t size, off_t base) { int ret; @@ -476,10 +487,10 @@ int arch_decode_at(Arch * arch, ArchInstructionCall ** calls, return -error_set_code(1, "%s", strerror(errno)); if(size == 0) return 0; - arch->base = base; + arch->code = code; arch->buffer_pos = offset; arch->buffer_cnt = offset + size; - if((ret = arch_decode(arch, calls, calls_cnt)) == 0 + if((ret = arch_decode(arch, code, calls, calls_cnt)) == 0 && fseek(arch->fp, offset + size, SEEK_SET) != 0) { free(*calls); /* XXX the pointer was updated anyway... */ @@ -520,9 +531,11 @@ int arch_init(Arch * arch, char const * filename, FILE * fp) arch->buffer_pos = 0; /* XXX used as offset */ arch->helper.arch = arch; arch->helper.get_filename = _arch_get_filename; + arch->helper.get_function_by_id = _arch_get_function_by_id; arch->helper.get_instruction_by_opcode = arch_get_instruction_by_opcode; arch->helper.get_register_by_id_size = arch_get_register_by_id_size; arch->helper.get_register_by_name_size = arch_get_register_by_name_size; + arch->helper.get_string_by_id = _arch_get_string_by_id; arch->helper.read = _arch_read; arch->helper.seek = _arch_seek; arch->helper.write = _arch_write; @@ -545,9 +558,11 @@ int arch_init_buffer(Arch * arch, char const * buffer, size_t size) arch->buffer_pos = 0; arch->helper.arch = arch; arch->helper.get_filename = _arch_get_filename; + arch->helper.get_function_by_id = _arch_get_function_by_id; arch->helper.get_instruction_by_opcode = arch_get_instruction_by_opcode; arch->helper.get_register_by_id_size = arch_get_register_by_id_size; arch->helper.get_register_by_name_size = arch_get_register_by_name_size; + arch->helper.get_string_by_id = _arch_get_string_by_id; arch->helper.write = NULL; arch->helper.read = _arch_read_buffer; arch->helper.seek = _arch_seek_buffer; @@ -594,6 +609,20 @@ static char const * _arch_get_filename(Arch * arch) } +/* arch_get_function_by_id */ +static AsmFunction * _arch_get_function_by_id(Arch * arch, AsmId id) +{ + return code_get_function_by_id(arch->code, id); +} + + +/* arch_get_string_by_id */ +static AsmString * _arch_get_string_by_id(Arch * arch, AsmId id) +{ + return code_get_string_by_id(arch->code, id); +} + + /* arch_read */ static ssize_t _arch_read(Arch * arch, void * buf, size_t size) { diff --git a/src/arch.h b/src/arch.h index 55589c6..4632d0c 100644 --- a/src/arch.h +++ b/src/arch.h @@ -21,6 +21,7 @@ # include # include # include "Asm/arch.h" +# include "code.h" /* Arch */ @@ -59,8 +60,9 @@ int arch_write(Arch * arch, ArchInstruction * instruction, ArchInstructionCall * call); /* disassembly */ -int arch_decode(Arch * arch, ArchInstructionCall ** calls, size_t * calls_cnt); -int arch_decode_at(Arch * arch, ArchInstructionCall ** calls, +int arch_decode(Arch * arch, Code * code, ArchInstructionCall ** calls, + size_t * calls_cnt); +int arch_decode_at(Arch * arch, Code * code, ArchInstructionCall ** calls, size_t * calls_cnt, off_t offset, size_t size, off_t base); ssize_t arch_read(Arch * arch, void * buf, size_t cnt); off_t arch_seek(Arch * arch, off_t offset, int whence); diff --git a/src/arch/dalvik.c b/src/arch/dalvik.c index 9789037..ce65342 100644 --- a/src/arch/dalvik.c +++ b/src/arch/dalvik.c @@ -198,8 +198,10 @@ static int _decode_immediate(DalvikDecode * dd, size_t i) uint8_t u8; uint16_t u16; uint32_t u32; + AsmFunction * af; + AsmString * as; - switch(AO_GET_SIZE(dd->call->operands[i].type)) + switch(AO_GET_SIZE(ao->type)) { case 4: if(dd->u8 >= 0) @@ -236,6 +238,21 @@ static int _decode_immediate(DalvikDecode * dd, size_t i) return -error_set_code(1, "%s", "Unsupported immediate" " operand"); } + switch(AO_GET_VALUE(ao->type)) + { + case AOI_REFERS_FUNCTION: + af = helper->get_function_by_id(helper->arch, + ao->value.immediate.value); + if(af != NULL) + ao->value.immediate.name = af->name; + break; + case AOI_REFERS_STRING: + as = helper->get_string_by_id(helper->arch, + ao->value.immediate.value); + if(as != NULL) + ao->value.immediate.name = as->name; + break; + } ao->value.immediate.negative = 0; return 0; } diff --git a/src/arch/dalvik.ins b/src/arch/dalvik.ins index 1cda41e..fa1bb09 100644 --- a/src/arch/dalvik.ins +++ b/src/arch/dalvik.ins @@ -33,6 +33,7 @@ #define OP_U8 AO_IMMEDIATE(0, 8, 0) #define OP_U16 AO_IMMEDIATE(0, 16, 0) #define OP_U16_STR AO_IMMEDIATE(0, 16, AOI_REFERS_STRING) +#define OP_U16_FUNC AO_IMMEDIATE(0, 16, AOI_REFERS_FUNCTION) #define OP_U32 AO_IMMEDIATE(0, 32, 0) #define OP_U64 AO_IMMEDIATE(0, 64, 0) @@ -96,6 +97,7 @@ { "double-to-float", 0x8c, OP1F, OP_REG4, OP_REG4, AOT_NONE }, { "double-to-int", 0x8a, OP1F, OP_REG4, OP_REG4, AOT_NONE }, { "double-to-long", 0x8b, OP1F, OP_REG4, OP_REG4, AOT_NONE }, +{ "execute-inline", 0xee, OP1F, OP_U8, OP_U16, OP_U16 }, { "fill-array-data", 0x26, OP1F, OP_REG8, OP_U32, AOT_NONE }, { "filled-new-array", 0x24, OP1F, OP_REG8, OP_U32, AOT_NONE }, { "filled-new-array-range", @@ -129,7 +131,8 @@ { "int-to-float", 0x82, OP1F, OP_REG4, OP_REG4, AOT_NONE }, { "int-to-long", 0x81, OP1F, OP_REG4, OP_REG4, AOT_NONE }, { "int-to-short", 0x8f, OP1F, OP_REG4, OP_REG4, AOT_NONE }, -{ "invoke-direct", 0x70, OP1F, OP_U8, OP_U16, OP_U16 }, +{ "invoke-direct", 0x70, OP1F, OP_U8, OP_U16_FUNC, OP_U16 }, +{ "invoke-direct-empty",0xf0, OP1F, OP_U8, OP_U16_FUNC, OP_U16 }, { "invoke-direct/range",0x76, OP1F, OP_U8, OP_U16, OP_U16 }, { "invoke-interface", 0x72, OP1F, OP_U8, OP_U16, OP_U16 }, { "invoke-interface-range", diff --git a/src/code.c b/src/code.c index 3f987aa..05a3631 100644 --- a/src/code.c +++ b/src/code.c @@ -25,7 +25,6 @@ #include #include "arch.h" #include "format.h" -#include "common.h" #include "code.h" @@ -129,6 +128,47 @@ char const * code_get_format(Code * code) } +/* code_get_function_by_id */ +AsmFunction * code_get_function_by_id(Code * code, AsmId id) +{ + /* FIXME implement */ + return NULL; +} + + +/* code_get_string_by_id */ +AsmString * code_get_string_by_id(Code * code, AsmId id) +{ + /* XXX CodeString has to be exactly like an AsmString */ + return _code_string_get_by_id(code, id); +} + + +/* code_set_function */ +int code_set_function(Code * code, int id, char const * name, off_t offset, + ssize_t size) +{ + /* FIXME implement */ + return -1; +} + + +/* code_set_string */ +int code_set_string(Code * code, int id, char const * name, off_t offset, + ssize_t length) +{ + CodeString * cs = NULL; + + if(id >= 0) + cs = _code_string_get_by_id(code, id); + if(cs == NULL) + cs = _code_string_append(code); + if(cs == NULL || _code_string_set(cs, id, name, offset, length) != 0) + return -1; + return cs->id; +} + + /* useful */ /* code_close */ int code_close(Code * code) @@ -158,7 +198,7 @@ int code_decode(Code * code, char const * buffer, size_t size) size_t i; arch_init_buffer(code->arch, buffer, size); - if((ret = arch_decode(code->arch, &calls, &calls_cnt)) == 0) + if((ret = arch_decode(code->arch, code, &calls, &calls_cnt)) == 0) { fprintf(stderr, "DEBUG: %lu\n", calls_cnt); for(i = 0; i < calls_cnt; i++) @@ -226,59 +266,37 @@ static int _decode_print(Code * code, ArchInstructionCall * call) static void _decode_print_immediate(Code * code, ArchOperand * ao) { - CodeString * cs; - printf("%s$0x%lx", ao->value.immediate.negative ? "-" : "", ao->value.immediate.value); if(AO_GET_VALUE(ao->type) == AOI_REFERS_STRING) { - cs = _code_string_get_by_id(code, ao->value.immediate.value); - if(cs != NULL && cs->name == NULL) - _code_string_read(code, cs); - if(cs != NULL && cs->name != NULL) - printf(" \"%s\"", cs->name); + if(ao->value.immediate.name != NULL) + printf(" \"%s\"", ao->value.immediate.name); else printf("%s", " (string)"); } + else if(AO_GET_VALUE(ao->type) == AOI_REFERS_FUNCTION) + { + if(ao->value.immediate.name != NULL) + printf(" call \"%s\"", ao->value.immediate.name); + else + printf("%s", " (call)"); + } } -/* code_decode_file */ -static int _decode_file_callback(void * priv, char const * section, - off_t offset, size_t size, off_t base); -static int _set_string_callback(void * priv, int id, char const * name, - off_t offset, ssize_t size); - -int code_decode_file(Code * code, char const * filename) +/* code_decode_at */ +int code_decode_at(Code * code, char const * section, off_t offset, + size_t size, off_t base) { - int ret; - FILE * fp; - - if((fp = fopen(filename, "r")) == NULL) - return -error_set_code(1, "%s: %s", filename, strerror(errno)); - arch_init(code->arch, filename, fp); - format_init(code->format, filename, fp); - ret = format_decode(code->format, _set_string_callback, - _decode_file_callback, code); - format_exit(code->format); - arch_exit(code->arch); - if(fclose(fp) != 0 && ret == 0) - ret = -error_set_code(1, "%s: %s", filename, strerror(errno)); - return ret; -} - -static int _decode_file_callback(void * priv, char const * section, - off_t offset, size_t size, off_t base) -{ - Code * code = priv; ArchInstructionCall * calls = NULL; size_t calls_cnt = 0; size_t i; if(section != NULL) printf("%s%s:\n", "\nDisassembly of section ", section); - if(arch_decode_at(code->arch, &calls, &calls_cnt, offset, size, base) - != 0) + if(arch_decode_at(code->arch, code, &calls, &calls_cnt, offset, size, + base) != 0) return -1; if(size != 0) printf("\n%08lx:\n", (long)offset + (long)base); @@ -288,19 +306,23 @@ static int _decode_file_callback(void * priv, char const * section, return 0; } -static int _set_string_callback(void * priv, int id, char const * name, - off_t offset, ssize_t length) -{ - Code * code = priv; - CodeString * cs = NULL; - if(id >= 0) - cs = _code_string_get_by_id(code, id); - if(cs == NULL) - cs = _code_string_append(code); - if(cs == NULL || _code_string_set(cs, id, name, offset, length) != 0) - return -1; - return cs->id; +/* code_decode_file */ +int code_decode_file(Code * code, char const * filename) +{ + int ret; + FILE * fp; + + if((fp = fopen(filename, "r")) == NULL) + return -error_set_code(1, "%s: %s", filename, strerror(errno)); + arch_init(code->arch, filename, fp); + format_init(code->format, filename, fp); + ret = format_decode(code->format, code); + format_exit(code->format); + arch_exit(code->arch); + if(fclose(fp) != 0 && ret == 0) + ret = -error_set_code(1, "%s: %s", filename, strerror(errno)); + return ret; } @@ -382,9 +404,13 @@ static CodeString * _code_string_get_by_id(Code * code, AsmId id) size_t i; for(i = 0; i < code->strings_cnt; i++) - if(code->strings[i].id >= 0 && code->strings[i].id == id) - return &code->strings[i]; - return NULL; + if(code->strings[i].id >= 0 && (AsmId)code->strings[i].id == id) + break; + if(i == code->strings_cnt) + return NULL; + if(code->strings[i].name == NULL) + _code_string_read(code, &code->strings[i]); + return &code->strings[i]; } @@ -429,23 +455,28 @@ static CodeString * _code_string_append(Code * code) /* code_string_read */ static int _code_string_read(Code * code, CodeString * codestring) { + off_t offset; /* XXX should not have to be kept */ char * buf; if(codestring->offset < 0 || codestring->length < 0) return -error_set_code(1, "%s", "Insufficient information to" " read string"); - if(arch_seek(code->arch, codestring->offset, SEEK_SET) - != codestring->offset) + if((offset = arch_seek(code->arch, 0, SEEK_CUR)) < 0) return -1; if((buf = malloc(codestring->length + 1)) == NULL) return -error_set_code(1, "%s", strerror(errno)); + if(arch_seek(code->arch, codestring->offset, SEEK_SET) + != codestring->offset) + return -1; if(arch_read(code->arch, buf, codestring->length) != codestring->length) { free(buf); + arch_seek(code->arch, offset, SEEK_SET); return -1; } buf[codestring->length] = '\0'; free(codestring->name); codestring->name = buf; + arch_seek(code->arch, offset, SEEK_SET); return 0; } diff --git a/src/code.h b/src/code.h index 8556d66..e8c75a3 100644 --- a/src/code.h +++ b/src/code.h @@ -19,10 +19,7 @@ # define ASM_CODE_H # include -# include "Asm/asm.h" -# include "arch.h" -# include "format.h" -# include "token.h" +# include "Asm/arch.h" /* types */ @@ -38,6 +35,14 @@ char const * code_get_arch(Code * code); char const * code_get_filename(Code * code); char const * code_get_format(Code * code); +AsmFunction * code_get_function_by_id(Code * code, AsmId id); +AsmString * code_get_string_by_id(Code * code, AsmId id); + +int code_set_function(Code * code, int id, char const * name, off_t offset, + ssize_t size); +int code_set_string(Code * code, int id, char const * name, off_t offset, + ssize_t length); + /* useful */ /* common */ int code_open(Code * code, char const * filename); @@ -50,6 +55,8 @@ int code_section(Code * code, char const * section); /* disassembly */ int code_decode(Code * code, char const * buffer, size_t size); +int code_decode_at(Code * code, char const * section, off_t offset, + size_t size, off_t base); int code_decode_file(Code * code, char const * filename); #endif /* !ASM_CODE_H */ diff --git a/src/format.c b/src/format.c index a7f3521..7f7176f 100644 --- a/src/format.c +++ b/src/format.c @@ -40,11 +40,8 @@ struct _Format char const * filename; FILE * fp; - /* diassembly */ - /* callbacks */ - FormatSetStringCallback callback_set_string; - FormatDecodeCallback callback_decode; - void * callback_priv; + /* deassembly */ + Code * code; }; @@ -114,28 +111,30 @@ char const * format_get_name(Format * format) /* format_decode */ static int _decode_callback(Format * format, char const * section, off_t offset, size_t size, off_t base); +static AsmString * _get_string_by_id_callback(Format * format, AsmId id); +static int _set_function_callback(Format * format, int id, char const * name, + off_t offset, ssize_t size); static int _set_string_callback(Format * format, int id, char const * name, off_t offset, ssize_t size); -int format_decode(Format * format, FormatSetStringCallback set_string, - FormatDecodeCallback decode, void * priv) +int format_decode(Format * format, Code * code) { int ret; if(format->plugin->decode == NULL) return error_set_code(1, "%s: %s", format_get_name(format), "Disassembly is not supported"); - format->helper.set_string = _set_string_callback; - format->callback_set_string = set_string; format->helper.decode = _decode_callback; - format->callback_decode = decode; - format->callback_priv = priv; + format->helper.get_string_by_id = _get_string_by_id_callback; + format->helper.set_function = _set_string_callback; + format->helper.set_string = _set_string_callback; + format->code = code; ret = format->plugin->decode(format->plugin); + format->code = NULL; format->helper.set_string = NULL; - format->callback_set_string = NULL; + format->helper.set_function = NULL; + format->helper.get_string_by_id = NULL; format->helper.decode = NULL; - format->callback_decode = NULL; - format->callback_priv = NULL; return ret; } @@ -146,15 +145,24 @@ static int _decode_callback(Format * format, char const * section, fprintf(stderr, "DEBUG: %s(\"%s\", 0x%lx, 0x%lx, 0x%lx)\n", __func__, section, offset, size, base); #endif - return format->callback_decode(format->callback_priv, section, offset, - size, base); + return code_decode_at(format->code, section, offset, size, base); +} + +static AsmString * _get_string_by_id_callback(Format * format, AsmId id) +{ + return code_get_string_by_id(format->code, id); +} + +static int _set_function_callback(Format * format, int id, char const * name, + off_t offset, ssize_t size) +{ + return code_set_function(format->code, id, name, offset, size); } static int _set_string_callback(Format * format, int id, char const * name, off_t offset, ssize_t size) { - return format->callback_set_string(format->callback_priv, id, name, - offset, size); + return code_set_string(format->code, id, name, offset, size); } diff --git a/src/format.h b/src/format.h index 4d8234e..2c7a3db 100644 --- a/src/format.h +++ b/src/format.h @@ -19,6 +19,7 @@ # define ASM_FORMAT_H # include "Asm/format.h" +# include "code.h" /* Format */ @@ -26,6 +27,9 @@ /* types */ typedef int (*FormatDecodeCallback)(void * priv, char const * section, off_t offset, size_t size, off_t base); +typedef AsmString * (*FormatGetStringByIdCallback)(void * priv, AsmId id); +typedef int (*FormatSetFunctionCallback)(void * priv, int id, char const * name, + off_t offset, ssize_t length); typedef int (*FormatSetStringCallback)(void * priv, int id, char const * name, off_t offset, ssize_t length); @@ -45,7 +49,6 @@ int format_function(Format * format, char const * function); int format_section(Format * format, char const * section); /* disassembly */ -int format_decode(Format * format, FormatSetStringCallback set_string, - FormatDecodeCallback decode, void * priv); +int format_decode(Format * format, Code * code); #endif /* !ASM_FORMAT_H */ diff --git a/src/format/dex.c b/src/format/dex.c index e61c505..96dffcd 100644 --- a/src/format/dex.c +++ b/src/format/dex.c @@ -59,6 +59,7 @@ enum { TYPE_HEADER_ITEM = 0x0000, TYPE_STRING_ID_ITEM = 0x0001, + TYPE_METHOD_ID_ITEM = 0x0005, TYPE_CODE_ITEM = 0x2001, TYPE_STRING_DATA_ITEM = 0x2002 }; @@ -89,6 +90,13 @@ typedef struct _DexMapTryItem uint16_t handler_off; } DexMapTryItem; +typedef struct _DexMethodIdItem +{ + uint16_t class_idx; + uint16_t proto_idx; + uint32_t name_idx; +} DexMethodIdItem; + typedef struct _DexStringIdItem { uint32_t string_data_off; @@ -101,6 +109,12 @@ typedef struct _DexString char * string; } DexString; +typedef struct _Dex +{ + DexMethodIdItem * dmii; + size_t dmii_cnt; +} Dex; + /* variables */ static char _dex_signature[4] = "dex\n"; @@ -109,6 +123,7 @@ static char _dex_signature[4] = "dex\n"; /* prototypes */ /* plug-in */ static int _dex_init(FormatPlugin * format, char const * arch); +static int _dex_exit(FormatPlugin * format); static char const * _dex_detect(FormatPlugin * format); static int _dex_decode(FormatPlugin * format); @@ -122,7 +137,7 @@ FormatPlugin format_plugin = _dex_signature, sizeof(_dex_signature), _dex_init, - NULL, + _dex_exit, NULL, NULL, _dex_detect, @@ -137,12 +152,30 @@ FormatPlugin format_plugin = /* dex_init */ static int _dex_init(FormatPlugin * format, char const * arch) { + Dex * dex; + #ifdef DEBUG fprintf(stderr, "DEBUG: %s(\"%s\")\n", __func__, arch); #endif if(arch != NULL && strcmp(arch, "dalvik") != 0) return -error_set_code(1, "%s: %s", arch, "Unsupported Dex architecture"); + if((dex = object_new(sizeof(*dex))) == NULL) + return -1; + format->priv = dex; + dex->dmii = NULL; + dex->dmii_cnt = 0; + return 0; +} + + +/* dex_exit */ +static int _dex_exit(FormatPlugin * format) +{ + Dex * dex = format->priv; + + free(dex->dmii); + free(dex); return 0; } @@ -158,6 +191,8 @@ static char const * _dex_detect(FormatPlugin * format) /* dex_decode */ static int _decode_map(FormatPlugin * format, DexHeader * dh); static int _decode_map_code(FormatPlugin * format, off_t offset, size_t size); +static int _decode_map_method_id(FormatPlugin * format, off_t offset, + size_t size); static int _decode_map_string_id(FormatPlugin * format, off_t offset, size_t size); @@ -215,9 +250,14 @@ static int _decode_map(FormatPlugin * format, DexHeader * dh) ret |= _decode_map_code(format, dmi.offset, dmi.size); break; + case TYPE_METHOD_ID_ITEM: + ret |= _decode_map_method_id(format, dmi.offset, + dmi.size); + break; case TYPE_STRING_ID_ITEM: ret |= _decode_map_string_id(format, dmi.offset, dmi.size); + break; } if(helper->seek(helper->format, offset, SEEK_SET) != offset) return -1; @@ -286,13 +326,49 @@ static int _decode_map_code(FormatPlugin * format, off_t offset, size_t size) return 0; } +static int _decode_map_method_id(FormatPlugin * format, off_t offset, + size_t size) +{ + FormatPluginHelper * helper = format->helper; + Dex * dex = format->priv; + ssize_t s; + size_t i; + AsmString * string; + char const * name; + + if(dex->dmii != NULL) + return 0; /* already parsed */ + if(helper->seek(helper->format, offset, SEEK_SET) != offset) + return -1; + s = sizeof(*dex->dmii) * size; + if((dex->dmii = malloc(s)) == NULL) + return -error_set_code(1, "%s", strerror(errno)); + if(helper->read(helper->format, dex->dmii, s) != s) + return -1; + for(i = 0; i < size; i++) + { + dex->dmii[i].class_idx = _htol16(dex->dmii[i].class_idx); + dex->dmii[i].proto_idx = _htol16(dex->dmii[i].proto_idx); + dex->dmii[i].name_idx = _htol32(dex->dmii[i].name_idx); + if((string = helper->get_string_by_id(helper->format, + dex->dmii[i].name_idx)) != NULL) + name = string->name; + else + /* XXX report error? */ + name = NULL; + helper->set_function(helper->format, i, name, -1, -1); + } + dex->dmii_cnt = size; + return 0; +} + static int _decode_map_string_id(FormatPlugin * format, off_t offset, size_t size) { FormatPluginHelper * helper = format->helper; - size_t i; DexStringIdItem * dsii; ssize_t s; + size_t i; uint8_t u8; if(helper->seek(helper->format, offset, SEEK_SET) != offset) @@ -307,14 +383,15 @@ static int _decode_map_string_id(FormatPlugin * format, off_t offset, dsii[i].string_data_off = _htol32(dsii[i].string_data_off); offset = dsii[i].string_data_off; if(helper->seek(helper->format, offset, SEEK_SET) != offset) - return -1; + break; if(helper->read(helper->format, &u8, sizeof(u8)) != sizeof(u8)) - return -1; + break; #ifdef DEBUG fprintf(stderr, "DEBUG: %s() string %lu offset 0x%x len %u\n", __func__, i, offset, u8); #endif helper->set_string(helper->format, i, NULL, offset + 1, u8); } - return 0; + free(dsii); + return (i == size) ? 0 : -1; } diff --git a/src/main.c b/src/main.c index f2dc5e2..6f3f701 100644 --- a/src/main.c +++ b/src/main.c @@ -19,7 +19,6 @@ #include #include #include "Asm/asm.h" -#include "common.h" #include "../config.h" diff --git a/src/parser.c b/src/parser.c index 877a583..efdd6f2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -22,6 +22,7 @@ #include #include #include "common.h" +#include "token.h" #include "parser.h" diff --git a/src/project.conf b/src/project.conf index 8d88436..195e91f 100644 --- a/src/project.conf +++ b/src/project.conf @@ -34,28 +34,28 @@ ldflags=-L. -Wl,-rpath,$(LIBDIR) -lasm install=$(BINDIR) [arch.c] -depends=arch.h,../config.h +depends=../include/Asm/arch.h,../include/Asm/asm.h,arch.h,code.h,../config.h [asm.c] depends=../include/Asm.h,code.h,parser.h,../config.h [code.c] -depends=../include/Asm.h,arch.h,code.h,format.h +depends=../include/Asm.h,../include/Asm/arch.h,../include/Asm/asm.h,../include/Asm/format.h,arch.h,code.h,format.h [deasm.c] -depends=../include/Asm.h +depends=../include/Asm.h,../include/Asm/arch.h,../include/Asm/asm.h,../include/Asm/format.h,../config.h [format.c] -depends=format.h,../config.h +depends=../include/Asm/asm.h,../include/Asm/format.h,code.h,format.h,../config.h [main.c] -depends=../include/Asm.h +depends=../include/Asm/asm.h,../config.h [parser.c] -depends=parser.h +depends=parser.h,token.h [scanner.c] depends=../include/Asm.h,token.h [token.c] -depends=token.h +depends=common.h,token.h