《coredump问题原理探究》Linux x86版4.5节函数的逆向之coredump例子

来源：互联网发布：loadrunner监控数据库编辑：程序博客网时间：2024/04/29 20:59

在了解了上面的规律后，现在开始来解决本章一开头的问题：

(gdb) bt#0  0x4365b569 in vfprintf () from /lib/libc.so.6#1  0x436629ff in printf () from /lib/libc.so.6#2  0x080485b9 in main ()

看一下main函数的汇编：

(gdb) disassemble mainDump of assembler code for function main:   0x08048500 <+0>:     push   %ebp   0x08048501 <+1>:     mov    %esp,%ebp   0x08048503 <+3>:     and    $0xfffffff0,%esp   0x08048506 <+6>:     sub    $0x20,%esp   0x08048509 <+9>:     movl   $0x0,0x1c(%esp)   0x08048511 <+17>:    jmp    0x80485bf <main+191>   0x08048516 <+22>:    mov    0x1c(%esp),%eax   0x0804851a <+26>:    lea    0x0(,%eax,4),%edx   0x08048521 <+33>:    mov    0xc(%ebp),%eax   0x08048524 <+36>:    add    %edx,%eax   0x08048526 <+38>:    mov    (%eax),%eax   0x08048528 <+40>:    mov    %eax,(%esp)   0x0804852b <+43>:    call   0x80483d0 <strlen@plt>   0x08048530 <+48>:    mov    %eax,0x18(%esp)   0x08048534 <+52>:    mov    0x18(%esp),%eax   0x08048538 <+56>:    cmp    $0x1,%eax   0x0804853b <+59>:    je     0x8048570 <main+112>   0x0804853d <+61>:    cmp    $0x2,%eax   0x08048540 <+64>:    je     0x804858f <main+143>   0x08048542 <+66>:    test   %eax,%eax   0x08048544 <+68>:    jne    0x80485a5 <main+165>   0x08048546 <+70>:    mov    0x1c(%esp),%eax   0x0804854a <+74>:    lea    0x0(,%eax,4),%edx   0x08048551 <+81>:    mov    0xc(%ebp),%eax   0x08048554 <+84>:    add    %edx,%eax   0x08048556 <+86>:    mov    (%eax),%eax   0x08048558 <+88>:    movzbl (%eax),%eax   0x0804855b <+91>:    movsbl %al,%eax   0x0804855e <+94>:    mov    %eax,0x4(%esp)   0x08048562 <+98>:    movl   $0x8048674,(%esp)   0x08048569 <+105>:   call   0x80483e0 <printf@plt>   0x0804856e <+110>:   jmp    0x80485ba <main+186>   0x08048570 <+112>:   mov    0x1c(%esp),%eax   0x08048574 <+116>:   add    $0x1,%eax   0x08048577 <+119>:   lea    0x0(,%eax,4),%edx   0x0804857e <+126>:   mov    0xc(%ebp),%eax   0x08048581 <+129>:   add    %edx,%eax   0x08048583 <+131>:   mov    (%eax),%eax   0x08048585 <+133>:   mov    %eax,(%esp)   0x08048588 <+136>:   call   0x80483f0 <puts@plt>   0x0804858d <+141>:   jmp    0x80485ba <main+186>   0x0804858f <+143>:   mov    0x1c(%esp),%eax   0x08048593 <+147>:   mov    %eax,0x4(%esp)   0x08048597 <+151>:   movl   $0x8048678,(%esp)   0x0804859e <+158>:   call   0x80483e0 <printf@plt>   0x080485a3 <+163>:   jmp    0x80485ba <main+186>   0x080485a5 <+165>:   mov    0x1c(%esp),%eax   0x080485a9 <+169>:   mov    %eax,0x4(%esp)   0x080485ad <+173>:   movl   $0x804867c,(%esp)   0x080485b4 <+180>:   call   0x80483e0 <printf@plt>   0x080485b9 <+185>:   nop   0x080485ba <+186>:   addl   $0x1,0x1c(%esp)   0x080485bf <+191>:   mov    0x1c(%esp),%eax   0x080485c3 <+195>:   cmp    0x8(%ebp),%eax   0x080485c6 <+198>:   setl   %al   0x080485c9 <+201>:   test   %al,%al   0x080485cb <+203>:   jne    0x8048516 <main+22>   0x080485d1 <+209>:   mov    $0x0,%eax   0x080485d6 <+214>:   leave     0x080485d7 <+215>:   ret    End of assembler dump.

由

   0x080485cb <+203>:   jne    0x8048516 <main+22>

可知，0x8048516到0x080485cb构成一个循环。

而0x080486cb的判断条件：

   0x080485bf <+191>:   mov    0x1c(%esp),%eax   0x080485c3 <+195>:   cmp    0x8(%ebp),%eax   0x080485c6 <+198>:   setl   %al   0x080485c9 <+201>:   test   %al,%al

里，提到ebp + 8.，由于main函数的原型第一个参数是argc，这在第三章“函数参数”有提及的。所以，上面的语句是判断esp+0x1c的值是否和argc相等，如果不是，就继续循环，否则跳出循环。假定esp+0x1c这个变量命名为cnt。

又由

   0x080485ba <+186>:   addl   $0x1,0x1c(%esp)

可翻译成 cnt++，可知，

cnt一开初应该是小于argc的。每次递增都要判断一次。

又由

   0x08048509 <+9>:     movl   $0x0,0x1c(%esp)   0x08048511 <+17>:    jmp    0x80485bf <main+191>

可知，cnt的初始值为0，且一初始化之后就跳转到0x080485bf和argc比较。

而又由

   0x080485d1 <+209>:   mov    $0x0,%eax   0x080485d6 <+214>:   leave     0x080485d7 <+215>:   ret

结合第三章“返回值”那一节可知，main函数无论什么情况都返回0。

所以，整个main函数可以翻译成这样：

int main( int argc, char* argv[] ){int cnt = 0;while ( cnt < argc ){   0x08048516 <+22>:    mov    0x1c(%esp),%eax   0x0804851a <+26>:    lea    0x0(,%eax,4),%edx   0x08048521 <+33>:    mov    0xc(%ebp),%eax   0x08048524 <+36>:    add    %edx,%eax   0x08048526 <+38>:    mov    (%eax),%eax   0x08048528 <+40>:    mov    %eax,(%esp)   0x0804852b <+43>:    call   0x80483d0 <strlen@plt>   0x08048530 <+48>:    mov    %eax,0x18(%esp)   0x08048534 <+52>:    mov    0x18(%esp),%eax   0x08048538 <+56>:    cmp    $0x1,%eax   0x0804853b <+59>:    je     0x8048570 <main+112>   0x0804853d <+61>:    cmp    $0x2,%eax   0x08048540 <+64>:    je     0x804858f <main+143>   0x08048542 <+66>:    test   %eax,%eax   0x08048544 <+68>:    jne    0x80485a5 <main+165>   0x08048546 <+70>:    mov    0x1c(%esp),%eax   0x0804854a <+74>:    lea    0x0(,%eax,4),%edx   0x08048551 <+81>:    mov    0xc(%ebp),%eax   0x08048554 <+84>:    add    %edx,%eax   0x08048556 <+86>:    mov    (%eax),%eax   0x08048558 <+88>:    movzbl (%eax),%eax   0x0804855b <+91>:    movsbl %al,%eax   0x0804855e <+94>:    mov    %eax,0x4(%esp)   0x08048562 <+98>:    movl   $0x8048674,(%esp)   0x08048569 <+105>:   call   0x80483e0 <printf@plt>   0x0804856e <+110>:   jmp    0x80485ba <main+186>   0x08048570 <+112>:   mov    0x1c(%esp),%eax   0x08048574 <+116>:   add    $0x1,%eax   0x08048577 <+119>:   lea    0x0(,%eax,4),%edx   0x0804857e <+126>:   mov    0xc(%ebp),%eax   0x08048581 <+129>:   add    %edx,%eax   0x08048583 <+131>:   mov    (%eax),%eax   0x08048585 <+133>:   mov    %eax,(%esp)   0x08048588 <+136>:   call   0x80483f0 <puts@plt>   0x0804858d <+141>:   jmp    0x80485ba <main+186>   0x0804858f <+143>:   mov    0x1c(%esp),%eax   0x08048593 <+147>:   mov    %eax,0x4(%esp)   0x08048597 <+151>:   movl   $0x8048678,(%esp)   0x0804859e <+158>:   call   0x80483e0 <printf@plt>   0x080485a3 <+163>:   jmp    0x80485ba <main+186>   0x080485a5 <+165>:   mov    0x1c(%esp),%eax   0x080485a9 <+169>:   mov    %eax,0x4(%esp)   0x080485ad <+173>:   movl   $0x804867c,(%esp)   0x080485b4 <+180>:   call   0x80483e0 <printf@plt>   0x080485b9 <+185>:   nopcnt++;}return 0;}

分析一下上面汇编块的跳转语句

由

   0x08048538 <+56>:    cmp    $0x1,%eax   0x0804853b <+59>:    je     0x8048570 <main+112>

可知，

   0x08048570 <+112>:   mov    0x1c(%esp),%eax   0x08048574 <+116>:   add    $0x1,%eax   0x08048577 <+119>:   lea    0x0(,%eax,4),%edx   0x0804857e <+126>:   mov    0xc(%ebp),%eax   0x08048581 <+129>:   add    %edx,%eax   0x08048583 <+131>:   mov    (%eax),%eax   0x08048585 <+133>:   mov    %eax,(%esp)   0x08048588 <+136>:   call   0x80483f0 <puts@plt>   0x0804858d <+141>:   jmp    0x80485ba <main+186>

是在eax等于1的情况下的代码块。

同样分析

   0x0804853d <+61>:    cmp    $0x2,%eax   0x08048540 <+64>:    je     0x804858f <main+143>   0x08048542 <+66>:    test   %eax,%eax   0x08048544 <+68>:    jne    0x80485a5 <main+165>

可知

   0x0804858f <+143>:   mov    0x1c(%esp),%eax   0x08048593 <+147>:   mov    %eax,0x4(%esp)   0x08048597 <+151>:   movl   $0x8048678,(%esp)   0x0804859e <+158>:   call   0x80483e0 <printf@plt>   0x080485a3 <+163>:   jmp    0x80485ba <main+186>

是在eax为1的情况下的代码块。

   0x08048546 <+70>:    mov    0x1c(%esp),%eax   0x0804854a <+74>:    lea    0x0(,%eax,4),%edx   0x08048551 <+81>:    mov    0xc(%ebp),%eax   0x08048554 <+84>:    add    %edx,%eax   0x08048556 <+86>:    mov    (%eax),%eax   0x08048558 <+88>:    movzbl (%eax),%eax   0x0804855b <+91>:    movsbl %al,%eax   0x0804855e <+94>:    mov    %eax,0x4(%esp)   0x08048562 <+98>:    movl   $0x8048674,(%esp)   0x08048569 <+105>:   call   0x80483e0 <printf@plt>   0x0804856e <+110>:   jmp    0x80485ba <main+186>

是在eax为0的情况下的代码块。

   0x080485a5 <+165>:   mov    0x1c(%esp),%eax   0x080485a9 <+169>:   mov    %eax,0x4(%esp)   0x080485ad <+173>:   movl   $0x804867c,(%esp)   0x080485b4 <+180>:   call   0x80483e0 <printf@plt>   0x080485b9 <+185>:   nop

是在eax不为0，1，2这三种情况下的代码块。由于这几个判断都是特定的整数，所以最好用switch结构来还原。

而对于

   0x0804856e <+110>:   jmp    0x80485ba <main+186>

这条指令所跳转的地方，刚好是

   0x080485ba <+186>:   addl   $0x1,0x1c(%esp)

即cnt++;

而eax的则是由

   0x0804852b <+43>:    call   0x80483d0 <strlen@plt>   0x08048530 <+48>:    mov    %eax,0x18(%esp)   0x08048534 <+52>:    mov    0x18(%esp),%eax

得来的，根据第三章“返回值”那一节，可以知道eax应该是strlen函数的返回值，命名为len。

而在

   0x08048516 <+22>:    mov    0x1c(%esp),%eax   0x0804851a <+26>:    lea    0x0(,%eax,4),%edx   0x08048521 <+33>:    mov    0xc(%ebp),%eax   0x08048524 <+36>:    add    %edx,%eax   0x08048526 <+38>:    mov    (%eax),%eax   0x08048528 <+40>:    mov    %eax,(%esp)

由于esp+0x1c已经命名为cnt了，ebp+0xC为main函数第二个参数argv。那么这一段汇编的意思是取argv[cnt]的值，并把它压入栈里。

所以main函数又会变成这样：

int main( int argc, char* argv[] ){int cnt = 0;while ( cnt < argc ){size_t len = strlen( argv[cnt] );switch ( len ){case 0:        {   0x08048546 <+70>:    mov    0x1c(%esp),%eax            0x0804854a <+74>:    lea    0x0(,%eax,4),%edx  0x08048551 <+81>:    mov    0xc(%ebp),%eax  0x08048554 <+84>:    add    %edx,%eax  0x08048556 <+86>:    mov    (%eax),%eax  0x08048558 <+88>:    movzbl (%eax),%eax  0x0804855b <+91>:    movsbl %al,%eax  0x0804855e <+94>:    mov    %eax,0x4(%esp)  0x08048562 <+98>:    movl   $0x8048674,(%esp)  0x08048569 <+105>:   call   0x80483e0 <printf@plt>  break;}case 1:{   0x08048570 <+112>:   mov    0x1c(%esp),%eax   0x08048574 <+116>:   add    $0x1,%eax   0x08048577 <+119>:   lea    0x0(,%eax,4),%edx   0x0804857e <+126>:   mov    0xc(%ebp),%eax   0x08048581 <+129>:   add    %edx,%eax   0x08048583 <+131>:   mov    (%eax),%eax   0x08048585 <+133>:   mov    %eax,(%esp)   0x08048588 <+136>:   call   0x80483f0 <puts@plt>   break;}case 2:{   0x0804858f <+143>:   mov    0x1c(%esp),%eax   0x08048593 <+147>:   mov    %eax,0x4(%esp)   0x08048597 <+151>:   movl   $0x8048678,(%esp)   0x0804859e <+158>:   call   0x80483e0 <printf@plt>break;  }  default:  { 0x080485a5 <+165>:   mov    0x1c(%esp),%eax   0x080485a9 <+169>:   mov    %eax,0x4(%esp)   0x080485ad <+173>:   movl   $0x804867c,(%esp)   0x080485b4 <+180>:   call   0x80483e0 <printf@plt>   0x080485b9 <+185>:   nop   break;}   }cnt++;}return 0;}

看一下case 0情况的汇编：

  0x08048546 <+70>:    mov    0x1c(%esp),%eax  0x0804854a <+74>:    lea    0x0(,%eax,4),%edx  0x08048551 <+81>:    mov    0xc(%ebp),%eax  0x08048554 <+84>:    add    %edx,%eax  0x08048556 <+86>:    mov    (%eax),%eax  0x08048558 <+88>:    movzbl (%eax),%eax  0x0804855b <+91>:    movsbl %al,%eax  0x0804855e <+94>:    mov    %eax,0x4(%esp)  0x08048562 <+98>:    movl   $0x8048674,(%esp)  0x08048569 <+105>:   call   0x80483e0 <printf@plt>

由于printf的第一个参数是格式字符串，那么看一下0x8048674存放着什么内容：

(gdb) x /s 0x80486740x8048674 <__dso_handle+4>:      "%c\n"

而

  0x08048546 <+70>:    mov    0x1c(%esp),%eax  0x0804854a <+74>:    lea    0x0(,%eax,4),%edx  0x08048551 <+81>:    mov    0xc(%ebp),%eax  0x08048554 <+84>:    add    %edx,%eax

又在分析strlen时已经知道是指argv[cnt+1]，所以，

  0x08048556 <+86>:    mov    (%eax),%eax  0x08048558 <+88>:    movzbl (%eax),%eax  0x0804855b <+91>:    movsbl %al,%eax

就是取argv[cnt+1][0]的值。

那么，case 0的汇编可翻译成

printf( “%c\n”, argv[cnt][0] );

也就是说，main函数可以变成这样：

int main( int argc, char* argv[] ){int cnt = 0;while ( cnt < argc ){size_t len = strlen( argv[cnt] );switch ( len ){case 0:        {printf( “%c\n”, argv[cnt][0] );  break;}case 1:{   0x08048570 <+112>:   mov    0x1c(%esp),%eax   0x08048574 <+116>:   add    $0x1,%eax   0x08048577 <+119>:   lea    0x0(,%eax,4),%edx   0x0804857e <+126>:   mov    0xc(%ebp),%eax   0x08048581 <+129>:   add    %edx,%eax   0x08048583 <+131>:   mov    (%eax),%eax   0x08048585 <+133>:   mov    %eax,(%esp)   0x08048588 <+136>:   call   0x80483f0 <puts@plt>   break;}case 2:{   0x0804858f <+143>:   mov    0x1c(%esp),%eax   0x08048593 <+147>:   mov    %eax,0x4(%esp)   0x08048597 <+151>:   movl   $0x8048678,(%esp)   0x0804859e <+158>:   call   0x80483e0 <printf@plt>break;  }  default:  {   0x080485a5 <+165>:   mov    0x1c(%esp),%eax   0x080485a9 <+169>:   mov    %eax,0x4(%esp)   0x080485ad <+173>:   movl   $0x804867c,(%esp)   0x080485b4 <+180>:   call   0x80483e0 <printf@plt>   0x080485b9 <+185>:   nop   break;}   }cnt++;}return 0;}

按照case 0的情况可以分析出其它几个如下：

case 1:{puts( argv[cnt+1] );   break;}case 2:{printf( “%d\n”, cnt );break;  }             default:             {                printf( “%s\n”, cnt );                break;             }

即整个main函数如下：

int main( int argc, char* argv[] ){int cnt = 0;while ( cnt < argc ){size_t len = strlen( argv[cnt+1] );switch ( len ){case 0:        {printf( “%c\n”, argv[cnt+1][0] );  break;}case 1:{puts( argv[cnt+2] );   break;}case 2:{printf( “%d\n”, cnt );break;  }             default:             {                printf( “%s\n”, cnt );                break;             }   }cnt++;}return 0;}

由于崩溃指令地址0x080485b9是在一段中

  default:  { 0x080485a5 <+165>:   mov    0x1c(%esp),%eax   0x080485a9 <+169>:   mov    %eax,0x4(%esp)   0x080485ad <+173>:   movl   $0x804867c,(%esp)   0x080485b4 <+180>:   call   0x80483e0 <printf@plt>   0x080485b9 <+185>:   nop   break;}

可知，是由

             default:             {                printf( “%s\n”, cnt );                break;             }

导致崩溃的。

对比一下源代码，可见非常吻合。

 #include <stdio.h> #include <string.h> int main(int argc, char* argv[] ) {     for ( int i = 0; i < argc; i++ )     {         int len = strlen( argv[i] );         switch ( len )         {             case 0:                 printf( "%c\n", argv[i][0] );                 break;             case 1:                 printf( "%s\n", argv[i+1] );                 break;             case 2:                 printf( "%d\n", i );                 break;             default:                 printf( "%s\n", i );                 break;         }     }      return 0; }