利用Kprobe探测内核中的变量

来源：互联网发布：淘宝卖家违规扣分编辑：程序博客网时间：2024/05/21 06:43

原创作品，允许转载，转载时请务必以超链接形式标明文章原始出处、作者信息和本声明。否则将追究法律责任。http://alanwu.blog.51cto.com/3652632/1111213

今天遇到一个问题，需要探测内核中buffer cache block的大小。我想到了Kprobe这个神奇的工具，并且很好的探测到了内核中的变量值，非常的方便，在此分享一下。

采用dd等工具写设备的时候，是需要经过块设备层的buffer cache，当请求块大小小于buffer cache的block_size时，Linux的策略是首先需要从磁盘load数据至buffer cache，然后再将新写入的“局部数据”写入buffer cache。这一步骤完成之后，会将整个buffer cache标识成dirty，挂载到设备所属的radix tree上，然后定时唤醒后台writeback线程刷新dirty block至磁盘。今天对linux-3.2和linux-2.6.23的顺序写进行了对比测试，发现请求大小在512至2048之间时，Linux-3.2的性能居然比Linux-2.6.23还差。测试后得到的性能特征似乎与buffer cache的块大小有关系，因此，我采用Kprobe对两个版本的块大小进行了探测验证。

为了探测这个值，首先需要找一个合适的探测点，根据代码分析的结果，我选择在__block_write_begin函数中调用create_empty_buffers函数时的机会点，采用Kprobe插入一段代码，打印buffer cache block_size的值。探测点位置的源代码如下所示：

int __block_write_begin(struct page *page, loff_t pos, unsigned len,  
        get_block_t *get_block)  
{  
    。。。  
 
    blocksize = 1 << inode->i_blkbits;  
    if (!page_has_buffers(page))  
        create_empty_buffers(page, blocksize, 0);  
    head = page_buffers(page);  
 
    bbits = inode->i_blkbits;  
    block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);  
    。。。  
}

通过上面函数，我们知道blocksize就是buffer cache block块大小，因此，我们可以截获create_empty_buffers函数之后，打印传入的第二个参数就可以得到buffer cache块大小值了。截获create_empty_buffers函数很简单，通过kallsyms_lookup_name函数或者/proc/kallsyms就可以得到截获函数对应的内存地址。关键的问题在于截获这个函数之后，我们如果得到他的第二个参数，这就关系到函数的参数传递问题了。

在X86_64平台上，Linux的参数传递通过如下9个寄存器完成，分别为：RDI，RSI，RDX，RCX，RAX，R8，R9，R10，R11。在pre_handler函数中，我们可以得到寄存器组变量，通过寄存器组变量我们可以通过RSI寄存器得到create_empty_buffers函数传入的第二个参数值。对于Linux-2.6.23版本，函数调用过程中寄存器在栈中布局定义如下：

struct pt_regs {  
    unsigned long r15;  
    unsigned long r14;  
    unsigned long r13;  
    unsigned long r12;  
    unsigned long rbp;  
    unsigned long rbx;  
/* arguments: non interrupts/non tracing syscalls only save upto here*/  
    unsigned long r11;  
    unsigned long r10;  
    unsigned long r9;  
    unsigned long r8;  
    unsigned long rax;  
    unsigned long rcx;  
    unsigned long rdx;  
    unsigned long rsi;  
    unsigned long rdi;  
    unsigned long orig_rax;  
/* end of arguments */  
/* cpu exception frame or undefined */  
    unsigned long rip;  
    unsigned long cs;  
    unsigned long eflags;  
    unsigned long rsp;  
    unsigned long ss;  
/* top of stack page */  
};

对于Linux-3.2版本，寄存器的组织结构是相同的，但是名字定义有所差别，新版本的寄存器定义如下：

struct pt_regs {  
    unsigned long r15;  
    unsigned long r14;  
    unsigned long r13;  
    unsigned long r12;  
    unsigned long bp;  
    unsigned long bx;  
/* arguments: non interrupts/non tracing syscalls only save up to here*/  
    unsigned long r11;  
    unsigned long r10;  
    unsigned long r9;  
    unsigned long r8;  
    unsigned long ax;  
    unsigned long cx;  
    unsigned long dx;  
    unsigned long si;  
    unsigned long di;  
    unsigned long orig_ax;  
/* end of arguments */  
/* cpu exception frame or undefined */  
    unsigned long ip;  
    unsigned long cs;  
    unsigned long flags;  
    unsigned long sp;  
    unsigned long ss;  
/* top of stack page */  
};

知道如何访问截获函数的输入参数之后，probe程序就可以很容易编写了，我的实验程序如下所示，仅供参考。

/*  
 * kprobe_jiffies.c  
 */  
 
#include <linux/module.h> 
#include <linux/kernel.h> 
#include <linux/string.h> 
#include <linux/init.h> 
#include <linux/kprobes.h> 
#include <linux/kallsyms.h> 
#include "asm/ptrace.h"  
#include "asm/current.h"  
#include "linux/utsname.h"  
 
/* global probe object */  
struct kprobe probe;  
 
/*  
 * enter the probe pointer  
 */  
static int pre_probe(struct kprobe *probe, struct pt_regs *regs)  
{  
    printk("block_size = %d.\n", regs->si);  
    return 0;  
}  
/*  
 * exit the probe pointer  
 */  
static void post_probe(struct kprobe *probe, struct pt_regs *regs, unsigned long flags)  
{}  
 
static int __init kprobe_ init(void)  
{  
    probe.pre_handler = pre_probe;  
    probe.post_handler = post_probe;  
 
    probe.addr = (kprobe_opcode_t *) kallsyms_lookup_name("create_empty_buffers");  
    if (probe.addr == NULL) {  
        return 1;  
    }  
 
    register_kprobe(&probe);  
    printk("register probe driver.\n");  
    return 0;  
}  
 
static void __exit kprobe_exit(void)  
{  
    unregister_kprobe(&probe);  
    printk("unregister probe driver.\n");  
    return;  
}  
 
module_init(kprobe_ init);  
module_exit(kprobe_ exit);  
 
MODULE_AUTHOR("xxx");  
MODULE_DESCRIPTION("kernel probe driver");  
MODULE_LICENSE("GPL");

Kprobe的确是个非常不错的调试工具，我们可以不断地发掘他的功能，从而更好地调试、探测内核代码。

本文出自 “存储之道” 博客，请务必保留此出处http://alanwu.blog.51cto.com/3652632/1111213

IBM kprobe

http://www.ibm.com/developerworks/cn/linux/l-cn-systemtap1/

http://www.ibm.com/developerworks/cn/linux/l-kprobes.html

0 0