ystem Call Interposition: how to implement virtualization
来源:互联网 发布:怪物猎人ol 知乎 编辑:程序博客网 时间:2024/06/05 20:38
A System Call Interposition (SCI) support tracks all the system service requests of processes.Each system request can be modified or denied.
It is possible to implement tools to trace, monitor, or virtualize processes.
This posting shows three different ways to implement a System Call Interposition service.The simple virtualization problem to hide the contents of the file /etc/passwd will be implementedby each SCI service, showing pros and cons of each proposal.
This example can also be used as a proof-of-concept test to propose others services for SCI.
Contents
- 1The example
- 2Purelibc
- 3ptrace
- 4kmview.ko (based on utrace)
- 5Basic Performance Evaluation
The example
When a process tries to open the file "/etc/passwd" the system call must fail returning errno=ENOENT.
Purelibc
#define _GNU_SOURCE#include <stdio.h>#include <string.h>#include <stdarg.h>#include <sys/syscall.h>#include <unistd.h>#include <purelibc.h>#include <errno.h>static sfun _native_syscall;static char buf[128];static long int mysc(long int sysno, ...){ va_list ap; long int a1,a2,a3,a4,a5,a6; va_start (ap, sysno); a1=va_arg(ap,long int); a2=va_arg(ap,long int); a3=va_arg(ap,long int); a4=va_arg(ap,long int); a5=va_arg(ap,long int); a6=va_arg(ap,long int); va_end(ap); if (sysno == __NR_open) { char *path=(char *)a1; if (a1 && strcmp(path,"/etc/passwd")==0) { errno=ENOENT; return -1; } } return _native_syscall(sysno,a1,a2,a3,a4,a5,a6);} void __attribute ((constructor))init_test (void){ _native_syscall=_pure_start(mysc,NULL,PUREFLAG_STDALL);}
Compile this source code (sci_purelibc.c):
gcc -shared -o sci_purelibc.so sci_purelibc.c
preload purelibc and this shared object:
export LD_PRELOAD=libpurelibc.so:/tmp/tests/syscall_interposition/sci_purelibc.so
and now /etc/passwd has disappeared
$cat /etc/passwd cat: /etc/passwd: No such file or directory
Requirements: depends on the purelibc library
Pros: very fast.
Cons: unsafe (can be easily cincunvented), it works only for dynamically linked executables.
ptrace
#include <sys/ptrace.h>#include <sys/types.h>#include <sys/wait.h>#include <unistd.h>#include <stdio.h>#include <limits.h>#include <errno.h>#include <sys/user.h>#include <asm/ptrace-abi.h>#include <asm/unistd.h>int main(int argc, char *argv[]){ pid_t child; long orig_eax; child = fork(); if(child == 0) { ptrace(PTRACE_TRACEME, 0, NULL, NULL); argv++; execvp(argv[0],argv); } else { int status; int gotpasswd=0; int out=0; while(1) { waitpid(child,&status,0); if(WIFEXITED(status) || WIFSIGNALED(status)) break; orig_eax = ptrace(PTRACE_PEEKUSER, child, 4 * ORIG_EAX, NULL); if (gotpasswd == 0) { if (orig_eax == __NR_open) { if (out==0) { char path[PATH_MAX]; int i; long pathaddr=ptrace(PTRACE_PEEKUSER, child, 4 * EBX, NULL); errno=0; for (i=0; i<PATH_MAX; i++) { if ((i&0x3) == 0) { long chunk=ptrace(PTRACE_PEEKDATA, child, (char *)(pathaddr+i), 0); if (errno != 0) break; * ((long *) (&path[i])) = chunk; } if (path[i] == 0) break; } if (strcmp(path,"/etc/passwd")==0) { ptrace(PTRACE_POKEUSER, child, 4 * ORIG_EAX, __NR_getpid); gotpasswd=1; } } out = 1-out; } } else { ptrace(PTRACE_POKEUSER, child, 4 * EAX, -ENOENT); gotpasswd=out=0; } ptrace(PTRACE_SYSCALL, child, NULL, NULL); } } return 0;}
Compile the source code (sci_ptrace.c)
gcc -o sci_ptrace sci_ptrace.c
Run it:
./sci_ptrace cat /etc/passwd cat: /etc/passwd: No such file or directory
Requirements: none (the kernel must provide ptrace)
Pros: it works
Cons: Slow, many "addresses" are processor architeture dependent, the interface is not clean (some signals cannot be used, SIGSTOP/SIGCONT, it overrides the natural semantics of the wait system call).
kmview.ko (based on utrace)
#define _GNU_SOURCE#include <sys/types.h>#include <sys/wait.h>#include <unistd.h>#include <stdio.h>#include <stdlib.h>#include <limits.h>#include <errno.h>#include <fcntl.h>#include <string.h>#include <asm/unistd.h>#include <sys/ioctl.h>#include <kmview.h>void dowait(int signal){ int w; wait(&w);}#ifdef OPT_PATH_HASHstatic int hash(char *s){ int rv=0; while (*s) { rv ^= (rv << 5) + (rv >> 2) + *s; s++; } return rv;}#endifmain(int argc, char *argv[]){ int fd; struct kmview_event event; int flags=0;#ifdef OPT_OPEN_ONLY int bitmap[INT_PER_MAXSYSCALL];#endif#ifdef OPT_PATH_HASH struct ghosthash64 gh;#endif fd=open("/dev/kmview",O_RDONLY); if (fd <0) exit(1);#ifdef OPT_OPEN_ONLY scbitmap_fill(bitmap); scbitmap_clr(bitmap, __NR_open); ioctl(fd, KMVIEW_SYSCALLBITMAP,bitmap);#endif#ifdef OPT_PATH_HASH flags|=KMVIEW_FLAG_PATH_SYSCALL_SKIP; gh.deltalen[0]=strlen("/etc/passwd"); gh.hash[0] = hash("/etc/passwd"); gh.deltalen[1]=GH_TERMINATE; ioctl(fd,KMVIEW_GHOSTMOUNTS,&gh);#endif#ifdef OPT_FDSET flags|=KMVIEW_FLAG_FDSET;#endif ioctl(fd, KMVIEW_SET_FLAGS, flags); signal(SIGCHLD,dowait); if (fork()) { while (1) { read(fd,&event,sizeof(event)); switch (event.tag) { case KMVIEW_EVENT_NEWTHREAD: { struct kmview_ioctl_umpid ump; ump.kmpid=event.x.newthread.kmpid; ump.umpid=event.x.newthread.kmpid; ioctl(fd, KMVIEW_UMPID, &ump); break; } case KMVIEW_EVENT_TERMTHREAD: if (event.x.termthread.remaining == 0) exit (0); break; case KMVIEW_EVENT_SYSCALL_ENTRY: if (event.x.syscall.scno == __NR_open) { char path[PATH_MAX]; struct kmview_ioctl_data data={event.x.syscall.x.umpid, event.x.syscall.args[0],PATH_MAX,path}; ioctl(fd,KMVIEW_READSTRINGDATA, &data); if (strcmp(path,"/etc/passwd") == 0) { struct kmview_event_ioctl_sysreturn outevent; outevent.x.kmpid=event.x.syscall.x.umpid; outevent.retval=-1; outevent.erno = ENOENT; ioctl(fd,KMVIEW_SYSVIRTUALIZED, &outevent); } else ioctl(fd, KMVIEW_SYSRESUME, event.x.syscall.x.umpid); } else ioctl(fd, KMVIEW_SYSRESUME, event.x.syscall.x.umpid); break; } } } else { /* traced root process*/ ioctl(fd, KMVIEW_ATTACH); close(fd); argv++; execvp(argv[0],argv); }}
Compile the source code (sci_kmview.c)
gcc -o sci_kmview sci_kmview.c
Run it:
./sci_kmview cat /etc/passwd cat: /etc/passwd: No such file or directory
The code include several optimizations:
- OPT_OPEN_ONLY: the kernel module filters only the "open" system calls
- OPT_PATH_HASH: when a system calls uses a path, kmview.ko forward only those whose path matches a hash key
- OPT_FDSET: kmview.ko manages a table of the "virtualized" file descriptors
Optimizations can be added at compile time using a combination of -DOPT_OPEN_ONLY, -DOPT_PATH_HASH and -DOPT_FDSET.
Requirements: the kernel must support utrace and the kmview.ko kernel module must be loaded
Pros: fast, several optimizations can run in kernel space, clean design (event can be read from a device),architecture independent.
Cons: utrace is not a feature of the vanilla Linux kernel
Basic Performance Evaluation
The benchmarking code is the following:
#include <stdio.h>#include <fcntl.h>main(){ int i; int fd; for (i=0; i<100000; i++) { fd=open("/etc/passwd",O_RDONLY); close(fd); fd=open("/etc/hosts",O_RDONLY); close(fd); }}
The execution times are the following:
* kernel (not virtualized): 0.8sec* purelibc: 0.48sec* ptrace: ~37.5sec* kmview.ko (no opt): ~22sec* kmview.ko (opt): ~7.1sec
(purelibc virtualization is even faster than the non virtualized case because it generates less system calls)
Please note that this example has been designed to provide almost the worst case for the virtualizing service.The implementation based on kmview creates a minimal overhead when tested in a more common scenario (e.g. a compilation),
$ time gcc -o test test.creal 0m0.147suser 0m0.084ssys 0m0.044s$ time ./sci_kmview gcc -o test test.creal 0m0.146suser 0m0.088ssys 0m0.048s
- ystem Call Interposition: how to implement virtualization
- How to implement fiber
- how to implement QAbstractScrollArea
- How to detect virtualization 虚拟机检测
- How to terminate call
- How Win32 & Linux implement System Call
- How to implement "Find Target"
- How to: Implement Interface Events
- Example - how to implement INotifyDataErrorInfo
- How to implement double currency
- How to implement MySQL Sharding
- How to implement segment tree
- What is EPOLL? EPOLL vs Select call? And How to implement UDP server in Linux using EPOLL?
- What is EPOLL? Epoll vs Poll vs Select call ? And How to implement UDP server in Linux using EPOLL?
- How to implement Android Pull-to-Refresh
- How to check if your hardware supports virtualization
- HOW TO IMPLEMENT WEB SERVICES IN INDY
- How To: Implement A Server Plug-in
- JQuery可以编辑的表格
- 类加载器---类加载机制
- TCP状态变迁图及状态说明
- Java反射机制(2)
- 文件系统处理
- ystem Call Interposition: how to implement virtualization
- 原码转补码
- 第六周——多文件组织多个类程序
- STM32片外内存的使用
- OC从文件读到NSString
- 指针函数与函数指针的区别
- hibernate中悲观锁和乐观锁和lazy加载
- TopCoder——HillHiker(爬山问题)
- FAFU-1398 面积 矩形面积并 线段树+扫描线