Linux环境下libpcap库源代码分析

来源:互联网 发布:java 的this关键字 编辑:程序博客网 时间:2024/06/05 23:45
linux环境下libpcap 源代码分析韩大卫@吉林师范大学libpcap 源代码官方下载地址:git clone https://github.com/the-tcpdump-group/libpcap.gittcpdumpm源代码官方下载地址:git clone git://bpf.tcpdump.org/tcpdumptcpdump.c使用libpcap里的pcap_open_livepcap_loop 完成两个最关键的动作:获取捕获报文的接口,和捕获报文并将报文交给callback(关于tcpdump源代码的构架,请参考作者的tcpdump源代码分析) 现结合libpcap源代码分析pcap_open_livepcap_loop的实现机制,并进入linux内核,展示linux内核对这两个API的响应动作。tcpdump.cpcap_open_live的使用是:pd = pcap_open_live(device, snaplen, !pflag, 1000, ebuf); pcap_open_live定义如下:pcap_t *pcap_open_live(const char *source, int snaplen, int promisc, int to_ms, char *errbuf)source 为指定的网络接口。snaplen 为最大报文长度。Promisc 是否将设备设置为混杂模式。to_ms 超时时间。errbuf 为错误信息描述字符。返回值为cap_t类型的指针,pcap_t 定义是:typedef struct pcap pcap_t;struct pcap {/*typedef int (*read_op_t)(pcap_t *, int cnt, pcap_handler, u_char *);read_op为从网络接口读取报文的函数指针,待其得到赋值后,调用实现函数*/    read_op_t read_op; //从文件里读取报文的函数指针    int (*next_packet_op)(pcap_t *, struct pcap_pkthdr *, u_char **);//文件描述符,socket    int fd;    int selectable_fd;       int bufsize;    //read缓冲区大小    u_char *buffer; //read缓冲区指针    u_char *bp;    int cc;...    int snapshot;    int linktype;       /* Network linktype */    int linktype_ext;          int tzoff;      /* timezone offset */    int offset;     /* offset for proper alignment */    int activated;      /* true if the capture is really started */    int oldstyle;       /* if we're opening with pcap_open_live() */    struct pcap_opt opt;     u_char *pkt;...   //激活函数,激活函数在得到调用后,会建立起与底层IPCsocket    activate_op_t activate_op;...};pcap_t *pcap_open_live(const char *source, int snaplen, int promisc, int to_ms, char *errbuf){       pcap_t *p;    int status;
   //创建捕获报文的接口句柄
    p = pcap_create(source, errbuf);    if (p == NULL)        return (NULL);    //设置最大报文长度    status = pcap_set_snaplen(p, snaplen);    if (status < 0)        goto fail;//将设备设为混杂模式    status = pcap_set_promisc(p, promisc);    if (status < 0)        goto fail;//设置超时时间    status = pcap_set_timeout(p, to_ms);    if (status < 0)        goto fail;    p->oldstyle = 1;//pcap_avtivate调用pcap_tactivate_op, 建立起与底层IPC通道    status = pcap_activate(p);    if (status < 0)        goto fail;    return (p);...}pcap_t *pcap_create(const char *source, char *errbuf){       size_t i;    int is_theirs;    pcap_t *p;    if (source == NULL)        source = "any";//capture_source_types数组里寻找是否有特定API集合的接口对应source    for (i = 0; capture_source_types[i].create_op != NULL; i++) {        is_theirs = 0;        p = capture_source_types[i].create_op(source, errbuf, &is_theirs);        if (is_theirs) {                return (p);        }    }    //如果没有, 那么就将source作为普通网络接口    return (pcap_create_interface(source, errbuf));}pcap_create_interface() 函数在libpcap下有多个实现,可由编译宏来指定特定的pcap_create_interface来初始化read_op等函数指针。linux环境里默认是libpcap/pcap-linux.c中的 pcap_create_interface():pcap_t *pcap_create_interface(const char *device, char *ebuf){      pcap_t *handle;    /*可将 pcap_create_common看做pcap_t结构的构造函数,初始化一个pcap_t*/    handle = pcap_create_common(device, ebuf, sizeof (struct pcap_linux));    if (handle == NULL)        return NULL;    //pcap_t 的激活函数指针填充具体实现函数    handle->activate_op = pcap_activate_linux;    handle->can_set_rfmon_op = pcap_can_set_rfmon_linux;       return handle;}完成后回到pcap_open_live,设置snaplen,promisc,to_ms后,调用status = pcap_activate(p),该函数执行status = p->activate_op(p) 进而调用 pcap_activate_linux(), 完成read_op等重要函数指针的具体赋值。static int pcap_activate_linux(pcap_t *handle){       struct pcap_linux *handlep = handle->priv;    const char  *device;    int     status = 0;        device = handle->opt.source;        handle->inject_op = pcap_inject_linux;    handle->setfilter_op = pcap_setfilter_linux;    handle->setdirection_op = pcap_setdirection_linux;    handle->set_datalink_op = pcap_set_datalink_linux;    handle->getnonblock_op = pcap_getnonblock_fd;    handle->setnonblock_op = pcap_setnonblock_fd;    handle->cleanup_op = pcap_cleanup_linux;//最重要的函数指针read_op    handle->read_op = pcap_read_linux;    handle->stats_op = pcap_stats_linux;    if (strcmp(device, "any") == 0) {        if (handle->opt.promisc) {            handle->opt.promisc = 0;            /* Just a warning. */            snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,                "Promiscuous mode not supported on the \"any\" device");            status = PCAP_WARNING_PROMISC_NOTSUP;        }    }      handlep->device = strdup(device);    if (handlep->device == NULL) {        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "strdup: %s",             pcap_strerror(errno) );        return PCAP_ERROR;    }        handlep->timeout = handle->opt.timeout;    if (handle->opt.promisc)        handlep->proc_dropped = linux_if_drops(handlep->device);    //先使用activete_new()    status = activate_new(handle);    if (status < 0) {        goto fail;    }    //根据错误值具体处理    if (status == 1) {        switch (activate_mmap(handle, &status)) {        case 1:            return status;        case 0:           break;            case -1:            goto fail;        }    }//如果status0, 再尝试使用activete_old()函数    else if (status == 0) {        /* Non-fatal error; try old way */        if ((status = activate_old(handle)) != 1) {            goto fail;        }    }    status = 0;    if (handle->opt.buffer_size != 0) {        //设置socket的缓冲区和缓冲区长度        if (setsockopt(handle->fd, SOL_SOCKET, SO_RCVBUF,            &handle->opt.buffer_size,            sizeof(handle->opt.buffer_size)) == -1) {            snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,                 "SO_RCVBUF: %s", pcap_strerror(errno));            status = PCAP_ERROR;            goto fail;        }    }     handle->selectable_fd = handle->fd;       return status;...}     static intactivate_new(pcap_t *handle){   struct pcap_linux *handlep = handle->priv;    const char  *device = handle->opt.source;    int         is_any_device = (strcmp(device, "any") == 0);    int         sock_fd = -1, arptype;    int         err = 0;    struct packet_mreq  mr;/*指定网口情况下用PF_PACKET协议通信得到原始以太网数据帧数据关于socket()函数,我个人认为可以将其理解为open()open()打开不同的文件,这样在返回的句柄里就可使用这个文件设备模块提供的opssocket()打开不同的协议,返回句柄里也包括了该协议的底层模块提供的ops. 只不过linux下面没法将网络协议当作普通文件(如/dev/xx)处理,所以才有了另一套socket特定的APIs*/    sock_fd = is_any_device ?        socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL)) :        socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));...    handlep->sock_packet = 0;    /*iface_get_id()使用ioctl(fd, SIOCGIFINDEX, &ifr)获取lo还回设备的索引值*/    handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf);        handle->offset   = 0;        if (!is_any_device) {        handlep->cooked = 0;            if (handle->opt.rfmon) {            err = enter_rfmon_mode(handle, sock_fd, device);            if (err < 0) {                close(sock_fd);                return err;            }            if (err == 0) {                close(sock_fd);                return PCAP_ERROR_RFMON_NOTSUP;            }            if (handlep->mondevice != NULL)                device = handlep->mondevice;        }/*iface_get_arptype()调用ioctl(fd, SIOCGIFHWADDR, &ifr)获取硬件类型 */        arptype = iface_get_arptype(sock_fd, device, handle->errbuf);        if (arptype < 0) {            close(sock_fd);            return arptype;        }        map_arphrd_to_dlt(handle, arptype, 1);  ...          //获取指定设备的索引值        handlep->ifindex = iface_get_id(sock_fd, device, handle->errbuf);        if (handlep->ifindex == -1) {            close(sock_fd);            return PCAP_ERROR;/*iface_bind()将设备的索引值作为struct socketadd_ll的索引值与socket绑定    struct sockaddr_ll  sll;     sll.sll_family      = AF_PACKET;                                                                   sll.sll_ifindex     = ifindex;    sll.sll_protocol    = htons(ETH_P_ALL);bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1 */        if ((err = iface_bind(sock_fd, handlep->ifindex,handle->errbuf)) != 1) {                close(sock_fd);            if (err < 0)                return err;            else                return 0;   /* try old mechanism */        }...    }    if (!is_any_device && handle->opt.promisc) {        memset(&mr, 0, sizeof(mr));        mr.mr_ifindex = handlep->ifindex;        mr.mr_type    = PACKET_MR_PROMISC;        if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP,            &mr, sizeof(mr)) == -1) {            snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,                "setsockopt: %s", pcap_strerror(errno));            close(sock_fd);            return PCAP_ERROR;        }    }    if (handlep->cooked) {        if (handle->snapshot < SLL_HDR_LEN + 1)            handle->snapshot = SLL_HDR_LEN + 1;    }    handle->bufsize = handle->snapshot;        //根据以太网链路层类型决定VLAN Tag在报文中的偏移值    switch (handle->linktype) {        case DLT_EN10MB:        handlep->vlan_offset = 2 * ETH_ALEN;        break;        case DLT_LINUX_SLL:        handlep->vlan_offset = 14;        break;        default:        handlep->vlan_offset = -1; /* unknown */        break;    }    //sock_fd作为pcap_tfd    handle->fd = sock_fd;...}至此,通过pcap_open_live完成全部准备阶段的内容, 之后就可以使用pcap_loop()来获取来自底层的数据并提交给callback函数进行应用处理, tcpdump.c pcap_loop的使用是: status = pcap_loop(pd, cnt, callback, pcap_userdata); //cnt 为指定捕获报文的个数libpcap/pcap.c里有pcap_loop的定义:    int pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user){       register int n;        for (;;) {        if (p->rfile != NULL) {//从文件里读取报文            n = pcap_offline_read(p, cnt, callback, user);        } else {//从指定网口读取报文            do {//read_op即为pcap_read_packet                n = p->read_op(p, cnt, callback, user);            } while (n == 0);        }        //n<0时退出循环,退出pcap_loop        if (n <= 0)            return (n);        //如果达到捕获报文个数,退出pcap_loop        if (cnt > 0) {            cnt -= n;            if (cnt <= 0)                return (0);        }    }}   

函数指针read_op指向的就是pcap_read_packet

staticint

pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata{     struct pcap_linux   *handlep = handle->priv;    u_char          *bp; struct sockaddr_ll  from;        if (handle->break_loop) {            handle->break_loop = 0;            return PCAP_ERROR_BREAK;        }                    fromlen = sizeof(from);//从socket接受信息存入bp指向的缓存区, 每次最大数据bufize,MSG_TRUNC为返回包的实际长度  packet_len = recvfrom((struct sockaddr *) &from, &fromlen);    } while (packet_len == -1 && errno == EINTR);...caplen = packet_len;    if (caplen > handle->snapshot)        caplen = handle->snapshot;//捕获报文时的信息    pcap_header.caplen  = caplen;                                                                     pcap_header.len     = packet_len;    handlep->packets_read++;                    //将数据内容bp交给函数指针callback指向的函数处理    callback(userdata, &pcap_header, bp);                    return 1;   }                Linux内核对recvfrm 的响应:net/socket.cSYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,        unsigned, flags, struct sockaddr __user *, addr,        int __user *, addr_len){ struct socket *sock;    struct iovec iov;    struct msghdr msg;    struct sockaddr_storage address;    int err, err2;    int fput_needed;                if (size > INT_MAX)        size = INT_MAX;    if (!sock)        goto out;                msg.msg_control = NULL;    msg.msg_controllen = 0;    msg.msg_iovlen = 1;    //将iov作为msg的缓存区数据结构,使得iov可以跟随这msg一起作为参数传递下去    msg.msg_iov = &iov;    iov.iov_base = ubuf;   //将msg_name指针指向address, 后面调用中,为msg_name赋值时address便得到赋值    msg.msg_name = (struct sockaddr *)&address;    msg.msg_namelen = sizeof(address);    if (sock->file->f_flags & O_NONBLOCK)        flags |= MSG_DONTWAIT;/*用户层的调用 packet_len = recvfrom(handle->fd, bp + offset,handle->bufsize - offset, MSG_TRUNC,            //对recvform()里from和fromlen的赋值,此时address已得到赋值    if (err >= 0 && addr != NULL) {        err2 = move_addr_to_user((struct sockaddr *)&address,                     msg.msg_namelen, addr, addr_len);        if (err2 < 0)            err = err2;    } ...}sock_revmsg()会调用sock里的函数指针集合ops里的recvmsg,这个函数指针在不同的模块下有不同的实现函数:int sock_recvmsg(struct socket *sock, struct msghdr *msg,...}       static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,                 struct msghdr *msg, size_t size, int flags)    int err;    struct sock_iocb *si = kiocb_to_siocb(iocb);              si->sock = sock;    si->scm = NULL;    si->msg = msg;    si->size = size;    si->flags = flags;              err = security_socket_recvmsg(sock, msg, size, flags);    if (err)        return err;              return sock->ops->recvmsg(iocb, sock, msg, size, flags);} 由于activate_new()里面建立了 PF_PACKET协议的socket, 所以,linux会调用建立PF_PACKET的底层模块af_packet来响应recvmsg。 在linux启动阶段,af_packet模块初始化完成后,会填充ops->recvmsg等函数指针,对上层/net/sock完成接口对接。net/packet/af_packet.cstatic int __init packet_init(void)    sock_register(&packet_family_ops);    register_pernet_subsys(&packet_net_ops);    register_netdevice_notifier(&packet_netdev_notifier);}static struct net_proto_family packet_family_ops = {//PF_PACKET即AF_PACKET,数值为17    .family =   PF_PACKET,    .create =   packet_create,    .owner  =   THIS_MODULE,};static int packet_create(struct net *net, struct socket *sock, int protocol)    sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);    if (sk == NULL)        goto out;                  //为socket的ops指针集合填充实现函数。完成接口对接。    sock->ops = &packet_ops; ...               return 0;}              在packet_ops里有对struct sock的函数指针recvmsg填充实现函数packet_recvmsgstatic const struct proto_ops packet_ops = {    .family =   PF_PACKET,    .owner =    THIS_MODULE,    .release =  packet_release,    .bind =     packet_bind,    .connect =  sock_no_connect,    .socketpair =   sock_no_socketpair,    .accept =   sock_no_accept,    .getname =  packet_getname,    .poll =     packet_poll,    .ioctl =    packet_ioctl,    .listen =   sock_no_listen,    .shutdown = sock_no_shutdown,    .setsockopt =   packet_setsockopt,    .getsockopt =   packet_getsockopt,    .sendmsg =  packet_sendmsg,    .recvmsg =  packet_recvmsg,    .mmap =     packet_mmap,    .sendpage = sock_no_sendpage,};  packet_recvmsg 封装了接受报文并并将数据拷贝到用户层全部动作:static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,              struct msghdr *msg, size_t len, int flags){     struct sock *sk = sock->sk;    struct sk_buff *skb;    int copied, err;    struct sockaddr_ll *sll;   ...  //第一步,从skb接收队列里取得数据交给skb缓存    skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);    if (skb == NULL)                goto out;           ...    copied = skb->len;    if (copied > len) {        copied = len;    }          //第二步, 将获取到的数据skb拷贝到iov里,即完成数据对用户层的传递    err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);    if (err)            goto out_free;                    sock_recv_timestamp(msg, sk, skb);            /*将skb里的cb拷贝给msg->msg_name, 这样在net/socket.c的move_addr_to_user((struct sockaddr *)&address,就可以将此msg_name 传给用户层 。*/        if (msg->msg_name)        memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,                    if (pkt_sk(sk)->auxdata) {        struct tpacket_auxdata aux;                        aux.tp_status = TP_STATUS_USER;        if (skb->ip_summed == CHECKSUM_PARTIAL)            aux.tp_status |= TP_STATUS_CSUMNOTREADY;        aux.tp_len = PACKET_SKB_CB(skb)->origlen;        aux.tp_snaplen = skb->len;        aux.tp_mac = 0;        aux.tp_net = skb_network_offset(skb);        aux.tp_vlan_tci = skb->vlan_tci;        put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);    err = (flags&MSG_TRUNC) ? skb->len : copied;...     return err;}net/core/datagram.cstruct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,                  int noblock, int *err)    int peeked;    return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),                                                                                   &peeked, err);}                           __skb_recv_datagram的作用就是接收一个数据报缓存的数据结构,本文的分析就到__skb_recv_datagram从sk->sk_receive_queue 中取得skb结构数据为止,至于这个接收队列是由谁建立的,发送端在哪里,后续介绍。       struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,                    int *peeked, int *err)    struct sk_buff *skb;    long timeo;    int error = sock_error(sk);        if (error)        goto no_packet;        do {        unsigned long cpu_flags;        //保证进程动作唯一,上spin锁           spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);         //查看skb的*next指针时候有值,即是否有报文来到,有的话返回指针,没有返回NULL        skb = skb_peek(&sk->sk_receive_queue);        if (skb) {            *peeked = skb->peeked;            if (flags & MSG_PEEK) {                skb->peeked = 1;            } else//如果不是MSG_PEEK(查看动作)的话,那么在sk的接收队列中后移skb,即操作新的skb                __skb_unlink(skb, &sk->sk_receive_queue);        }       //解spin锁        spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);       //有数据的话返回数据的缓存        if (skb)            return skb;   /*如果peek时没有数据到到,在阻塞情况下,等待一定时间,当达到超时时间还没有接收到数据,向err传送错误类型报告,退出本函数; 在非阻塞情况下,timeo为0,直接报错后退出*/        error = -EAGAIN;        if (!timeo)            goto no_packet;     //按照timeo的数值阻塞本进程,在timeo时间内持续执行do...while    } while (!wait_for_packet(sk, err, &timeo));       return NULL;   no_packet:    *err = error;    return NULL;}接收到skb后,调用skb_copy_datagram_iovec 将其拷贝到msg的iov里struct iovec{      void __user *iov_base;  //缓存的首地址    __kernel_size_t iov_len; //缓存可用的大小};  int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,                struct iovec *to, int len){      //报文头部长度    int start = skb_headlen(skb);    int i, copy = start – offset;    struct sk_buff *frag_iter;       trace_skb_copy_datagram_iovec(skb, len);       //复制报文头部    if (copy > 0) {        if (copy > len)            copy = len;        //将skb的copy长度(报文头部)的数据缓存复制到iov里,完成对用户层数据的传递        if (memcpy_toiovec(to, skb->data + offset, copy))            goto fault;        if ((len -= copy) == 0)            return 0;        offset += copy;}...         int end;            WARN_ON(start > offset + len);            end = start + frag_iter->len;        if ((copy = end - offset) > 0) {            if (copy > len)                copy = len;           //递归调用skb_copy_datagram_iovec,offset-start表示当前分片报文的长度            if (skb_copy_datagram_iovec(frag_iter,t,                goto fault;            if ((len -= copy) == 0)                return 0;            offset += copy;        }        start = end;    }    if (!len)        return 0; fault:    return -EFAULT;}int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len){           while (len > 0) {         if (iov->iov_len) {/*如果iov的iov_len大于len, 说明iov的缓存区还可以接受数据,那么设置本次拷贝大小为len  //将kdata拷贝到iov的base地址,长度为len,即将数据拷贝到用户层            if (copy_to_user(iov->iov_base, kdata, copy))                return -EFAULT;//每次拷贝后,kdata地址后移copy长度            kdata += copy;            len -= copy;//每次拷贝后, 将iov_len减去已经使用的长度            iov->iov_len -= copy;  //每次拷贝后,移动iov的base地址            iov->iov_base += copy;        }         iov++;                  }    return 0;}总结:pcap_open_live 调用pcap_create()来为pcap_t填充read_op等函数指针,并提供了激活函数pcap_activate_linux,建立了socket与linux底层模块af_packet通信。 pcap_loop 调用了read_op的实现函数 pcap_read_linux, pcap_read_linux 里面使用了recvfrom 获取以太网原始数据,linux的af_packet模块会响应并完成recvfrom动作;recvfrom完成后调用callback指向的函数处理这些数据,callback指针的赋值是在tcpdump里根据具体链路层环境赋值的。欢迎大家交流,不足之处请不吝指正,给予批评!
原创粉丝点击