obj2asm
来源:互联网 发布:html webgl 书籍 知乎 编辑:程序博客网 时间:2024/05/16 14:50
// 这个的工具的目标把 coff obj 转换为 asm 后,可以在使用 masm 汇编为 obj.
/*
written by dummyz@126.com 2007/02/24
*/
#include <list>
#include <vector>
#include <algorithm>
#include <string>
#include <iostream>
#include <fstream>
#include <cstdio>
#include <cstdarg>
#include <cstring>
#include <ctime>
#include <cassert>
#include <windows.h> // for suppert coff file format
#include "udis86.h"
using namespace std;
#define FILE_BUF_SIZE (1024 * 1024 * 5)
#define LOGOUT logout
static void logout(const char* format, ...)
{
va_list a;
va_start(a, format);
vprintf(format, a);
va_end(a);
}
//////////////////////////////////////////////////////////////////////////
// 判断是否是有效的 obj
static bool check_obj(const void* obj_data)
{
PIMAGE_FILE_HEADER pfh = (PIMAGE_FILE_HEADER)obj_data;
return (
pfh->Machine == IMAGE_FILE_MACHINE_I386 &&
pfh->NumberOfSections != 0 &&
pfh->SizeOfOptionalHeader == 0
);
}
#define to_sectname(a) to_symname(a)
static const char* to_symname(char* symname)
{
char* p = symname;
if ( isdigit(*p) )
*p++ = '_';
while ( *p != 0 )
{
if (
*p != '_' &&
*p != '@' &&
*p != '?' &&
*p != '$' &&
!isalnum(*p)
)
{
*p = '_';
}
p++;
}
*p = 0;
return symname;
}
static void output_copyright(ostream& stream)
{
time_t t;
time(&t);
stream << ";---------------------------------------------------/n"
"; This file is generated by the obj2asm/n"
"; author: songlei/n"
"; e-mail: dummyz@126.com/n"
"; date: 5/2/2007/n"
";----------------------------------------------------/n"
"; " << ctime(&t) << '/n';
}
static void output_asm_begin(ostream& stream)
{
stream << ".686p/n"
".mmx/n"
".model flat/n"
"option casemap :none/n/n";
}
static void output_asm_end(ostream& stream)
{
stream << "/tend/n";
}
static void output_segment_begin(ostream& stream, const IMAGE_SECTION_HEADER& secth)
{
//char sect_name[IMAGE_SIZEOF_SHORT_NAME + 1];
//to_sectname((char*)secth.Name, sect_name);
const char* sect_name = to_sectname((char*)secth.Name);
stream << sect_name << "/tSEGMENT/n";
stream << "/tASSUME FS:FLAT/n";
}
static void output_segment_end(ostream& stream, const IMAGE_SECTION_HEADER& secth)
{
//char sect_name[IMAGE_SIZEOF_SHORT_NAME + 1];
//to_sectname((char*)secth.Name, sect_name);
stream << to_sectname((char*)secth.Name) << "/tENDS/n/n";
}
static const char* get_symname(const IMAGE_SYMBOL* sym, const char* strtab)
{
char* symname = sym->N.Name.Short == 0 ? /
(char*)(strtab + sym->N.Name.Long) : (char*)sym->N.ShortName;
// -_-!!
return (*symname == '.' && sym[0].Value == 0) ?
((char*)(sym + 1) < strtab ? get_symname(sym + 1, strtab) : symname) : to_symname(symname);
}
/*
static const uint32_t convert_asm_hex(const char* s)
{
uint32_t r = 0;
while ( *s != 0 && *s != 'H' )
{
r <<= 4;
if ( *s >= '0' && *s <= '9' )
r += *s - '0';
else
r += tolower(*s) - 'a' + 10;
s++;
}
return r;
}
*/
static bool operator < (const IMAGE_SYMBOL& sym1, const IMAGE_SYMBOL& sym2)
{
return (sym1.Value < sym2.Value);
}
static void sort_syminfolst(const IMAGE_SYMBOL* symtab, list<uint32_t>& syminfolst)
{
// 排序, 按符号声明位置从低到高的地址
list<uint32_t>::iterator i = syminfolst.begin();
while ( i != syminfolst.end() )
{
const IMAGE_SYMBOL& sym1 = symtab[*i];
list<uint32_t>::iterator j = syminfolst.begin();
while ( j != i )
{
const IMAGE_SYMBOL& sym2 = symtab[*j];
if ( sym1.Value < sym2.Value )
{
uint32_t t = *i;
*i = *j;
*j = t;
}
j++;
}
i++;
}
}
static bool output_segment(ostream& stream,
const uint8_t* objbase,
int sectno)
{
if ( objbase == NULL || sectno <= 0 )
return false;
const IMAGE_FILE_HEADER& fh = *(PIMAGE_FILE_HEADER)objbase;
const IMAGE_SECTION_HEADER& secth = *((PIMAGE_SECTION_HEADER)(objbase + sizeof (IMAGE_FILE_HEADER)) + sectno - 1);
const IMAGE_SYMBOL* const symtab = (PIMAGE_SYMBOL)(objbase + fh.PointerToSymbolTable);
const char* const strtab = (char*)(symtab + fh.NumberOfSymbols);
const uint8_t* sectdat = NULL;
if ( secth.PointerToRawData != 0 )
sectdat = objbase + secth.PointerToRawData;
// 每个节的重定位表
const IMAGE_RELOCATION* relb = NULL, *rele = NULL;
if ( secth.PointerToRelocations != 0 )
{
relb = (PIMAGE_RELOCATION)(objbase + secth.PointerToRelocations);
rele = relb + secth.NumberOfRelocations;
}
list<uint32_t> syminfolst; // all symbols info in the segment
for ( unsigned j = 0; j < fh.NumberOfSymbols; j++ )
{
if ( symtab[j].SectionNumber == sectno && '.' != *get_symname(symtab + j, strtab) )
syminfolst.push_back(j);
j += symtab[j].NumberOfAuxSymbols;
}
sort_syminfolst(symtab, syminfolst); // 排序
// 反汇编
ud_t ud_obj;
ud_init(&ud_obj);
ud_set_mode(&ud_obj, 32);
ud_set_syntax(&ud_obj, UD_SYN_MASM);
list<uint32_t> lable; // 存放需要输出的标号的偏移
auto_ptr<char> str_result(new char[FILE_BUF_SIZE]); // 10MB 内存缓冲, 应该足够
char* result = str_result.get();
list<uint32_t>::iterator i = syminfolst.begin();
while ( i != syminfolst.end() )
{
const IMAGE_SYMBOL& sym = symtab[*i];
const char* const symname = get_symname(symtab + *i, strtab);
i++;
// 估测符号大小
size_t symsize = ((i != syminfolst.end()) ? symtab[*i].Value : secth.SizeOfRawData) - sym.Value;
if ( symsize == 0 ) continue;
LOGOUT("symname = %s value = %08X symsize = %08X/n", symname, sym.Value, symsize);
const uint8_t* sym_data = NULL;
if ( sectdat != NULL ) // 是否存在初始化数据
sym_data = sectdat + sym.Value;
else
{
result += sprintf(result, "%s/tdb %d dup (?)/n", symname, symsize);
continue;
}
if (
(sym.Type >> 4) == IMAGE_SYM_DTYPE_FUNCTION ||
(
// 判断是否需要反汇编标号下的内容
sym.StorageClass == IMAGE_SYM_CLASS_LABEL &&
relb < rele &&
sym.Value != relb->VirtualAddress
)
)
{
result += sprintf(result, "%s:/n", symname);
ud_set_pc(&ud_obj, secth.PointerToRawData + sym.Value);
ud_set_input_buffer(&ud_obj, (uint8_t*)sym_data, symsize); // 设置要反汇编的输入数据流
size_t k = 0;
while ( k < symsize )
{
size_t l = ud_disassemble(&ud_obj);
if ( 0 == l )
return false; // 非常严重的错误,无法继续
result += sprintf(result, "*%X ", secth.PointerToRawData + sym.Value + k); // 标号
char* p = ud_insn_asm(&ud_obj);
if ( relb < rele &&
(sym.Value + k) <= relb->VirtualAddress &&
(sym.Value + k + l) > relb->VirtualAddress )
{
// 有些操作是根据反汇编引擎输出结果的特点来处理
// 如果 syn-masm 输出发生变化,可能导致本处理过程错误
// 下面的判断顺序,不要随便改变
bool b_imm = true;
if ( ud_obj.operand[0].type == UD_OP_JIMM && ud_obj.operand[0].size == 32 )
{
// call sym
while ( *p != ' ' )
*result++ = *p++;
result += sprintf(result, " %s",
get_symname(symtab + relb->SymbolTableIndex, strtab));
p += 11;
relb++;
b_imm = false;
}
else
{
int j = 0;
if ( ud_obj.operand[1].type == UD_OP_MEM && (sym.Value + k + l - relb->VirtualAddress == 4) )
{
j = 1;
b_imm = false;
}
if ( ud_obj.operand[j].type == UD_OP_MEM && ud_obj.operand[j].offset == 32 )
{
while ( *p != 0 )
{
if ( p[0] == 'H' && p[1] == ']' && p[-9] == '0' )
{
if ( ud_obj.operand[j + 1].type == UD_NONE ? p[2] == 0 : p[2] != 0 )
break;
}
*result++ = *p++;
}
p += 2;
result -= 9;
result += sprintf(result, "%s]",
get_symname(symtab + relb->SymbolTableIndex, strtab));
relb++;
}
}
while ( *p != 0 )
*result++ = *p++;
if ( b_imm && relb != rele && p[-1] == 'H' && p[-10] == '0' &&
(sym.Value + k + l - relb->VirtualAddress == 4) )
{
// 通过上面的判断,此处应该是立即数的替换
// 立即数放在了指令的最后面
result -= 10;
result += sprintf(result, "offset %s",
get_symname(symtab + relb->SymbolTableIndex, strtab));
relb++;
}
// if ( ud_obj.operand[2].type == UD_OP_??? )
// {
// //do anything
// // 暂时没有想到什么指令, 第 3 个参数需要重定位
// }
}
else if ( ud_obj.operand[0].type == UD_OP_JIMM )
{
// 处理没有符号的相对转移,为其产生一个标号
// 自定义标号格式为:
// 前缀 _$@$@_song_lei_@$@$_,紧跟所在符号域,
// 接着是地址
uint32_t off = secth.PointerToRawData + sym.Value + k + l;
if ( ud_obj.operand[0].size == 32 )
off += ud_obj.operand[0].lval.sdword;
else
off += ud_obj.operand[0].lval.sbyte;
if ( find(lable.begin(), lable.end(), off) == lable.end() )
lable.push_back(off);
while ( *p != ' ' ) // 取出指令助记符
*result++ = *p++;
result += sprintf(result, " @dl_%X", off);
}
else
{
// 其他普通的指令
result += sprintf(result, "%s", ud_insn_asm(&ud_obj));
}
*result++ = '/n';
k += l;
}
}
else /* if ( sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
sym.StorageClass == IMAGE_SYM_CLASS_EXTERNAL ||
sym.StorageClass == IMAGE_SYM_CLASS_LABEL ) */
{
// 数据定义
result += sprintf(result, "%s", symname);
size_t k = 0;
while ( k < symsize )
{
// result += sprintf(result, "*%X ", secth.PointerToRawData + sym.Value + k);
// if ( k == 0 )
// result += sprintf(result, "%s", symname);
if ( relb < rele && (sym.Value + k == relb->VirtualAddress) )
{
result += sprintf(result, "/tdd offset %s/n",
get_symname(symtab + relb->SymbolTableIndex, strtab));
relb++;
k += 4;
}
else
{
result += sprintf(result, "/tdb 0%02XH/n", sym_data[k]);
k++;
}
}
}
}
*result++ = 0;
assert(result - str_result.get() < FILE_BUF_SIZE);
result = str_result.get();
while ( *result != 0 )
{
if ( '*' == *result )
{
char* p = ++result;
while ( *p++ != ' ' )
;
p[-1] = 0; // 替换空格为 0, 把它截成字符串
if ( !lable.empty() )
{
uint32_t a;
sscanf(result, "%X", &a);
list<uint32_t>::iterator j = find(lable.begin(), lable.end(), a); // 查询是否需要标识
if ( j != lable.end() )
{
lable.erase(j);
stream << "@dl_" << result << ":/n";
}
}
stream << "/t";
result = p;
}
char* p = result;
while ( *p++ != '/n' )
;
p[-1] = 0; // 把回车替换成 0, 截成字符串
stream << result << endl;
result = p;
}
// str_result();
return (relb == rele);
}
/*
author: dummyz@126.com
*/
static void output_public_and_extern_symbols(ostream& stream, const uint8_t* objbase)
{
const IMAGE_FILE_HEADER& fh = *(PIMAGE_FILE_HEADER)objbase;
const IMAGE_SYMBOL* const symtab = (PIMAGE_SYMBOL)(objbase + fh.PointerToSymbolTable);
char* strtab = (char*)(symtab + fh.NumberOfSymbols);
list<const char*> public_, extern_;
for ( unsigned t = 0; t < fh.NumberOfSymbols; t++ )
{
const IMAGE_SYMBOL& sym = symtab[t];
t += sym.NumberOfAuxSymbols;
if ( sym.StorageClass == IMAGE_SYM_CLASS_EXTERNAL ||
sym.StorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL)
{
const char* symname = get_symname(symtab + t, strtab);
// 暂时区分存放,结果不一定正确
if ( sym.SectionNumber > 0 )
public_.push_back(symname);
else if ( sym.SectionNumber == IMAGE_SYM_UNDEFINED )
extern_.push_back(symname);
}
}
list<const char*>::iterator i = public_.begin();
while ( i != public_.end() )
{
list<const char*>::iterator j = extern_.begin();
while ( j != extern_.end() )
{
if ( strcmp(*i, *j) == 0 )
{
j = extern_.erase(j);
break;
}
else {
j++;
}
}
if ( j == extern_.end() )
stream << "/tpublic " << *i << endl;
i++;
}
list<const char*>::iterator j = extern_.begin();
while ( j != extern_.end() )
{
stream << "/textrn " << *j << " :near/n";
j++;
}
stream << endl;
}
bool obj2asm(const char* obj_name, const char* asm_name)
{
if ( obj_name == NULL || asm_name == NULL )
{
return false;
}
ifstream obj_file(obj_name, ios::in | ios::binary);
ofstream asm_file(asm_name);
if ( obj_file.bad() || asm_file.bad() )
{
return false;
}
obj_file.seekg(0, ios::end);
const size_t size_obj = obj_file.tellg();
obj_file.seekg(0, ios::beg);
auto_ptr<uint8_t> obj_data(new uint8_t[size_obj]);
const uint8_t* const objbase = obj_data.get();
obj_file.read((char*)objbase, size_obj);
if ( !check_obj(objbase) )
{
return false;
}
output_copyright(asm_file);
output_asm_begin(asm_file);
output_public_and_extern_symbols(asm_file, objbase);
const IMAGE_FILE_HEADER& fh = *(PIMAGE_FILE_HEADER)objbase;
const IMAGE_SECTION_HEADER* const sect_table = (PIMAGE_SECTION_HEADER)(objbase + sizeof (IMAGE_FILE_HEADER));
for ( int i = 0; i < fh.NumberOfSections; i++ )
{
if (
(sect_table[i].Characteristics & IMAGE_SCN_LNK_REMOVE) ||
strnicmp(".debug", (char*)sect_table[i].Name, 6) == 0 ||
strnicmp(".sxdata", (char*)sect_table[i].Name, 7) == 0
)
{
continue;
}
output_segment_begin(asm_file, sect_table[i]);
if ( !output_segment(asm_file, objbase, i + 1) )
return false;
output_segment_end(asm_file, sect_table[i]);
}
output_asm_end(asm_file);
return true;
}
#if 0
int main(int argc, char* argv[])
{
bool ok = obj2asm("test.obj", "test.asm");
printf( ok ? "OK!/n" : "FAILED!/n");
if ( ok )
{
}
return 0;
}
#endif