pwnable之memcpy
来源:互联网 发布:韩智慧知乎 编辑:程序博客网 时间:2024/06/03 10:53
问题描述
Are you tired of hacking?, take some rest here.Just help me out with my small experiment regarding memcpy performance. after that, flag is yours.http://pwnable.kr/bin/memcpy.c
memcpy.c
// compiled with : gcc -o memcpy memcpy.c -m32 -lm#include <stdio.h>#include <string.h>#include <stdlib.h>#include <signal.h>#include <unistd.h>#include <sys/mman.h>#include <math.h>unsigned long long rdtsc(){ asm("rdtsc");}char* slow_memcpy(char* dest, const char* src, size_t len){ int i; for (i=0; i<len; i++) { dest[i] = src[i]; } return dest;}char* fast_memcpy(char* dest, const char* src, size_t len){ size_t i; // 64-byte block fast copy if(len >= 64){ i = len / 64; len &= (64-1); while(i-- > 0){ __asm__ __volatile__ ( "movdqa (%0), %%xmm0\n" "movdqa 16(%0), %%xmm1\n" "movdqa 32(%0), %%xmm2\n" "movdqa 48(%0), %%xmm3\n" "movntps %%xmm0, (%1)\n" "movntps %%xmm1, 16(%1)\n" "movntps %%xmm2, 32(%1)\n" "movntps %%xmm3, 48(%1)\n" ::"r"(src),"r"(dest):"memory"); dest += 64; src += 64; } } // byte-to-byte slow copy if(len) slow_memcpy(dest, src, len); return dest;}int main(void){ setvbuf(stdout, 0, _IONBF, 0); setvbuf(stdin, 0, _IOLBF, 0); printf("Hey, I have a boring assignment for CS class.. :(\n"); printf("The assignment is simple.\n"); printf("-----------------------------------------------------\n"); printf("- What is the best implementation of memcpy? -\n"); printf("- 1. implement your own slow/fast version of memcpy -\n"); printf("- 2. compare them with various size of data -\n"); printf("- 3. conclude your experiment and submit report -\n"); printf("-----------------------------------------------------\n"); printf("This time, just help me out with my experiment and get flag\n"); printf("No fancy hacking, I promise :D\n"); unsigned long long t1, t2; int e; char* src; char* dest; unsigned int low, high; unsigned int size; // allocate memory char* cache1 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); char* cache2 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); src = mmap(0, 0x2000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); size_t sizes[10]; int i=0; // setup experiment parameters for(e=4; e<14; e++){ // 2^13 = 8K low = pow(2,e-1); high = pow(2,e); printf("specify the memcpy amount between %d ~ %d : ", low, high); scanf("%d", &size); if( size < low || size > high ){ printf("don't mess with the experiment.\n"); exit(0); } sizes[i++] = size; } sleep(1); printf("ok, lets run the experiment with your configuration\n"); sleep(1); // run experiment for(i=0; i<10; i++){ size = sizes[i]; printf("experiment %d : memcpy with buffer size %d\n", i+1, size); dest = malloc( size ); memcpy(cache1, cache2, 0x4000); // to eliminate cache effect t1 = rdtsc(); slow_memcpy(dest, src, size); // byte-to-byte memcpy t2 = rdtsc(); printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1); memcpy(cache1, cache2, 0x4000); // to eliminate cache effect t1 = rdtsc(); fast_memcpy(dest, src, size); // block-to-block memcpy t2 = rdtsc(); printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1); printf("\n"); } printf("thanks for helping my experiment!\n"); printf("flag : ----- erased in this source code -----\n"); return 0;}
简单分析
slow_memcpy 是逐字节复制,fast_memcpy利用的是xmm寄存器无cache复制。不足64字节调用slow_memcpy
编译链接运行
这里在每次为dest申请空间后面加了一句,printf("dest addr :%p\n",dest);
$ gcc -o memcpy memcpy.c -m32 -lm$ ./memcpy Hey, I have a boring assignment for CS class.. :(The assignment is simple.------------------------------------------------------ What is the best implementation of memcpy? -- 1. implement your own slow/fast version of memcpy -- 2. compare them with various size of data -- 3. conclude your experiment and submit report ------------------------------------------------------This time, just help me out with my experiment and get flagNo fancy hacking, I promise :Dspecify the memcpy amount between 8 ~ 16 : 8specify the memcpy amount between 16 ~ 32 : 16specify the memcpy amount between 32 ~ 64 : 32specify the memcpy amount between 64 ~ 128 : 64specify the memcpy amount between 128 ~ 256 : 128specify the memcpy amount between 256 ~ 512 : 256specify the memcpy amount between 512 ~ 1024 : 512specify the memcpy amount between 1024 ~ 2048 : 1024specify the memcpy amount between 2048 ~ 4096 : 2048specify the memcpy amount between 4096 ~ 8192 : 4096ok, lets run the experiment with your configurationexperiment 1 : memcpy with buffer size 8ellapsed CPU cycles for slow_memcpy : 4620dest addr :0x57f46410ellapsed CPU cycles for fast_memcpy : 21792experiment 2 : memcpy with buffer size 16ellapsed CPU cycles for slow_memcpy : 828dest addr :0x57f46420ellapsed CPU cycles for fast_memcpy : 23100experiment 3 : memcpy with buffer size 32ellapsed CPU cycles for slow_memcpy : 768dest addr :0x57f46438ellapsed CPU cycles for fast_memcpy : 12456experiment 4 : memcpy with buffer size 64ellapsed CPU cycles for slow_memcpy : 1932dest addr :0x57f46460ellapsed CPU cycles for fast_memcpy : 14880experiment 5 : memcpy with buffer size 128ellapsed CPU cycles for slow_memcpy : 3192dest addr :0x57f464a8段错误
调试
$ gdb memcpy -qReading symbols from memcpy...(no debugging symbols found)...done.gdb-peda$ set disassembly-flavor intelgdb-peda$ rStarting program: /home/pwd/Desktop/pwdmylife/pwnable/memcpy/memcpy Hey, I have a boring assignment for CS class.. :(The assignment is simple.------------------------------------------------------ What is the best implementation of memcpy? -- 1. implement your own slow/fast version of memcpy -- 2. compare them with various size of data -- 3. conclude your experiment and submit report ------------------------------------------------------This time, just help me out with my experiment and get flagNo fancy hacking, I promise :Dspecify the memcpy amount between 8 ~ 16 : 8specify the memcpy amount between 16 ~ 32 : 16specify the memcpy amount between 32 ~ 64 : 32specify the memcpy amount between 64 ~ 128 : 64specify the memcpy amount between 128 ~ 256 : 128specify the memcpy amount between 256 ~ 512 : 256specify the memcpy amount between 512 ~ 1024 : 512specify the memcpy amount between 1024 ~ 2048 : 1024specify the memcpy amount between 2048 ~ 4096 : 2048specify the memcpy amount between 4096 ~ 8192 : 4096ok, lets run the experiment with your configurationexperiment 1 : memcpy with buffer size 8ellapsed CPU cycles for slow_memcpy : 5376dest addr :0x56559410ellapsed CPU cycles for fast_memcpy : 50632experiment 2 : memcpy with buffer size 16ellapsed CPU cycles for slow_memcpy : 544dest addr :0x56559420ellapsed CPU cycles for fast_memcpy : 20176experiment 3 : memcpy with buffer size 32ellapsed CPU cycles for slow_memcpy : 672dest addr :0x56559438ellapsed CPU cycles for fast_memcpy : 14136experiment 4 : memcpy with buffer size 64ellapsed CPU cycles for slow_memcpy : 1184dest addr :0x56559460ellapsed CPU cycles for fast_memcpy : 13944experiment 5 : memcpy with buffer size 128ellapsed CPU cycles for slow_memcpy : 2040dest addr :0x565594a8Program received signal SIGSEGV, Segmentation fault.[----------------------------------registers-----------------------------------]EAX: 0xf7fc8000 --> 0x0 EBX: 0x56558000 --> 0x2ee8 ECX: 0xffff9790 ("dest addr :0x565594a8\nr slow_memcpy : 2040\n\n2 : ")EDX: 0x565594a8 --> 0x0 ESI: 0x1 EDI: 0xf7f55000 --> 0x1b2db0 EBP: 0xffffbca8 --> 0xffffbd38 --> 0x0 ESP: 0xffffbc98 --> 0xffffbcb4 --> 0xf7fc8000 --> 0x0 EIP: 0x5655588f (<fast_memcpy+62>: movntps XMMWORD PTR [edx],xmm0)EFLAGS: 0x10202 (carry parity adjust zero sign trap INTERRUPT direction overflow)[-------------------------------------code-------------------------------------] 0x56555880 <fast_memcpy+47>: movdqa xmm1,XMMWORD PTR [eax+0x10] 0x56555885 <fast_memcpy+52>: movdqa xmm2,XMMWORD PTR [eax+0x20] 0x5655588a <fast_memcpy+57>: movdqa xmm3,XMMWORD PTR [eax+0x30]=> 0x5655588f <fast_memcpy+62>: movntps XMMWORD PTR [edx],xmm0 0x56555892 <fast_memcpy+65>: movntps XMMWORD PTR [edx+0x10],xmm1 0x56555896 <fast_memcpy+69>: movntps XMMWORD PTR [edx+0x20],xmm2 0x5655589a <fast_memcpy+73>: movntps XMMWORD PTR [edx+0x30],xmm3 0x5655589e <fast_memcpy+77>: add DWORD PTR [ebp+0x8],0x40[------------------------------------stack-------------------------------------]0000| 0xffffbc98 --> 0xffffbcb4 --> 0xf7fc8000 --> 0x0 0004| 0xffffbc9c --> 0xf7e13a25 (<__GI___libc_malloc+197>: test eax,eax)0008| 0xffffbca0 --> 0x56558000 --> 0x2ee8 0012| 0xffffbca4 --> 0x1 0016| 0xffffbca8 --> 0xffffbd38 --> 0x0 0020| 0xffffbcac ("O\\UV\250\224UV")0024| 0xffffbcb0 --> 0x565594a8 --> 0x0 0028| 0xffffbcb4 --> 0xf7fc8000 --> 0x0 [------------------------------------------------------------------------------]Legend: code, data, rodata, valueStopped reason: SIGSEGV0x5655588f in fast_memcpy ()
分析
网上找到有关SEE指令movntps的资料
movntps m128,XMM m128 <== XMM 直接把XMM中的值送入m128,不经过cache,必须对齐16字节.
这里edx存放了dset的首地址,16字节对齐则要求该地址最后4位均为0,而且dest的空间是malloc申请的堆。
32位,堆的结构
| 4bytes (pre_size) |4bytes (size+ 3 bits flag|A|M|P)|| data | |
solve
申请一定大小的空间,保证dest的地址的后4位为0
#!/usr/bin/env python#coding:utf-8#made by pwdfrom pwn import *import sysimport mathfor i in xrange(4,15): size = math.pow(2,i) print "###########",i for j in xrange(int(math.pow(2,i))): tmp = size + j tmp = 8 * ((tmp + 4) / 8 + 1) tmp += 8 if tmp % 16 == 0: print size + j break
阅读全文
0 0
- pwnable之memcpy
- pwnable之coin1
- pwnable之blackjack
- pwnable.kr 之fd
- pwnable.kr之bof
- pwnable.kr之flag
- pwnable.kr之passcode
- pwnable.kr之random
- pwnable.kr之shellshock
- pwnable.kr之mistake
- pwnable.kr之input
- pwnable.kr之lotto
- pwnable.kr之cmd1
- pwnable.kr之blackjack
- pwnable.kr之uaf
- pwnable之input
- pwnable.kr之cmd2
- pwnable.kr之uaf
- 数学公式
- 《算法-algorithm》第一章实验二 乱序检查
- Servlet学习第三天
- vector和list的使用及区别
- 数据增强在卷积神经网络中的应用
- pwnable之memcpy
- Linux之Apache下日志文件error.log过大的解决方法
- Animation 过渡效果
- 解决虚拟机下的ubuntu不能上网的问题
- CONFIG_NO_HZ (aka dynamic tick functionality)
- Zookeeper 配置
- 判素数(Prime number)
- PHP使用mcrypt进行AES加密后的数据NodeJS等无法解密的问题修复方案
- Hibernate_优化与事务