由pcap文件提取IPv6的flow数据

来源:互联网 发布:淘宝上aj正品店铺 编辑:程序博客网 时间:2024/04/30 07:24

一、目标:

现有两个大学抓取的packet数据,分片成若干部分,需要从中抽取出流信息( 源mac地址、目的mac地址、源ip地址、目的IP地址、源端口、目的端口、vlan、协议类型、流首包时间、流末包时间、流总大小),并按文件中的packet顺序,将结果输出出来。

二、实现过程:

1、没有沟通好需求是个硬伤,直接导致前面几次提交结果不合格。
2、pcap是二进制文件,花了点时间研究pcap的文件结构和二进制文件的读写方式。
3、数据量非常大,第一组数据packet数约一千万,第二组有一亿以上,对内存分配是个考验。
4、一开始采用了顺序查找,速度非常慢,运行到后期,10秒只能遍历100组数据。在老师提示下采用了hash表来存储,速度果然不同凡响。
5、hash表关键字的选择:
(1)初期:观察了packet数据,发现源ip地址(SrcIP)不一样的情况比较多,因此哈希函数和关键字都直接用它了。冲突解决采用线性散列(加1)。
( 2)中期:初期的冲突还是太多了,到后面速度异常慢,因此哈希函数和关键字都改成了源mac地址、目的mac地址、源ip地址、目的IP地址、源端口、目的端口、vlan、协议类型的和(当然还要对hash表长取模),这么一来冲突小了不少。然而运行至文件末时速度仍然不够。
(3)后期:导师提示了md5算法,即Message Digest Algorithm MD5(消息摘要算法第五版),它的特点是:
①、压缩性:任意长度的数据,算出的MD5值长度都是固定的。
②、容易计算:从原数据计算出MD5值很容易。
③、抗修改性:对原数据进行任何改动,哪怕只修改1个字节,所得到的MD5值都有很大区别。
④、弱抗碰撞:已知原数据和其MD5值,想找到一个具有相同MD5值的数据(即伪造数据)是非常困难的。
⑤、强抗碰撞:想找到两个不同的数据,使它们具有相同的MD5值,是非常困难的。
这就意味着用md5算法作为hash函数可以大大减少冲突(实际情况是基本不出现冲突),毕竟不同关键字算出来的值差异太大了(但并没有直接验证,只是通过运行时间间接得出的结论)。
6、运行时间:一百万个flow大概200s,这比顺序查找简直快了四五个量级。

三、代码

(运行环境为Visual Studio 2012)

主代码如下
#include<stdio.h>#include<stdlib.h>#include<string.h>#include "md5.h"#include<math.h>#include<time.h>#define N 13000000LL#define NUM_OF_MAX_FLOW 13000000LLstatic long long index[NUM_OF_MAX_FLOW]={0},FlowCnt=1;typedef unsigned char uint8;typedef struct{    int TimeStart;    int MicroSec;    int Caplen;    uint8 SrcMac[6];    uint8 DstMac[6];    uint8 SrcIP[4];    uint8 DstIP[4];    uint8 SrcPort[2];    uint8 DstPort[2];    uint8 VlanID[2];    uint8 VlanType[2];    uint8 Protocol;    int FlowBytes;}STREAM;typedef struct{    uint8 flag;    double timeStart;    double timeEnd;    uint8 SrcMac[6];    uint8 DstMac[6];    uint8 SrcIP[4];    uint8 DstIP[4];    uint8 SrcPort[2];    uint8 DstPort[2];    uint8 VlanID[2];    uint8 VlanType[2];    uint8 Protocol;    unsigned long FlowBytes;    unsigned long PacketsNum;}FLOW;static FLOW FlowTable[NUM_OF_MAX_FLOW] = { 0 };void writeHex(FILE *fw, uint8 *str, int n){    int i;    for (i = 0; i<n; i++){        fprintf(fw, "%x%x", (str[i] - str[i] % 16) >> 4, str[i] % 16);    }    return;}int isEqual(uint8 *old,uint8 *now,int n){    int i;    for(i=0;i<n;i++)       if(old[i]!=now[i]) return 0;    return 1;}int Equal(STREAM data,FLOW FlowData){    if(isEqual(data.SrcMac,FlowData.SrcMac,6))      if(isEqual(data.DstMac,FlowData.DstMac,6))        if(isEqual(data.SrcIP,FlowData.SrcIP,4))          if(isEqual(data.DstIP,FlowData.DstIP,4))            if(isEqual(data.SrcPort,FlowData.SrcPort,2))              if(isEqual(data.DstPort,FlowData.DstPort,2))                if(isEqual(data.VlanID,FlowData.VlanID,2))                  if(isEqual(data.VlanType,FlowData.VlanType,2))                    if(data.Protocol==FlowData.Protocol)                      return 1;    return 0;}long long string2num(uint8 *IP){    long long temp = (long long)IP[3];    temp += (long long)IP[2] << 8;    temp += (long long)IP[1] << 16;    temp += (long long)IP[0] << 24;     return temp;}int add2Hash(STREAM data,FLOW *FlowTable,long long HashNum){     int i,flag=0,cnt=0;     double temp; loop:     if(cnt++>100000) return 0;     if(FlowTable[HashNum].flag==0){           FlowTable[HashNum].flag=1;           FlowTable[HashNum].timeStart=data.TimeStart+(int)data.MicroSec/1000+(data.MicroSec%1000)/1000.0;           FlowTable[HashNum].timeEnd=FlowTable[HashNum].timeStart;           for(i=0;i<6;i++) FlowTable[HashNum].SrcMac[i]=data.SrcMac[i];           for(i=0;i<6;i++) FlowTable[HashNum].DstMac[i]=data.DstMac[i];           for(i=0;i<4;i++) FlowTable[HashNum].SrcIP[i]=data.SrcIP[i];           for(i=0;i<4;i++) FlowTable[HashNum].DstIP[i]=data.DstIP[i];           for(i=0;i<2;i++) FlowTable[HashNum].SrcPort[i]=data.SrcPort[i];           for(i=0;i<2;i++) FlowTable[HashNum].DstPort[i]=data.DstPort[i];           for(i=0;i<2;i++) FlowTable[HashNum].VlanID[i]=data.VlanID[i];           for(i=0;i<2;i++) FlowTable[HashNum].VlanType[i]=data.VlanType[i];           FlowTable[HashNum].Protocol=data.Protocol;           FlowTable[HashNum].FlowBytes=data.FlowBytes;           FlowTable[HashNum].PacketsNum=1;           index[FlowCnt++]=HashNum;           return 1;     }     else{//????            if(Equal(data,FlowTable[HashNum]))           {//???????flow               temp=data.TimeStart+(int)data.MicroSec/1000+(data.MicroSec%1000)/1000.0;              if (fabs(temp - FlowTable[HashNum].timeEnd)<5 && temp>FlowTable[HashNum].timeEnd){//?????????flow                  FlowTable[HashNum].timeEnd=temp;                 FlowTable[HashNum].FlowBytes += data.FlowBytes;                 FlowTable[HashNum].PacketsNum++;                 return 1;              }              else{                   /*HashNum=(HashNum*HashNum)%NUM_OF_MAX_FLOW;                   add2Hash(data,FlowTable,HashNum);//????????flow?*/                      HashNum = (HashNum + 1) % NUM_OF_MAX_FLOW;                    while (FlowTable[HashNum].flag == 1){                      if (Equal(data, FlowTable[HashNum])){                          temp = data.TimeStart + (int)data.MicroSec / 1000 + (data.MicroSec % 1000) / 1000.0;                          if (fabs(temp - FlowTable[HashNum].timeEnd) < 5 && temp>FlowTable[HashNum].timeEnd){//?????????flow                               FlowTable[HashNum].timeEnd = temp;                              FlowTable[HashNum].FlowBytes += data.FlowBytes;                              FlowTable[HashNum].PacketsNum++;                              flag = 1;                              return 1;                          }                          else                              HashNum = (HashNum + 1) % NUM_OF_MAX_FLOW;                      }                      else                          HashNum = (HashNum + 1) % NUM_OF_MAX_FLOW;                    }                    if (flag==0){//?????                       FlowTable[HashNum].flag = 1;                      FlowTable[HashNum].timeStart = data.TimeStart + (int)data.MicroSec / 1000 + (data.MicroSec % 1000) / 1000.0;                      FlowTable[HashNum].timeEnd = FlowTable[HashNum].timeStart;                      for (i = 0; i<6; i++) FlowTable[HashNum].SrcMac[i] = data.SrcMac[i];                      for (i = 0; i<6; i++) FlowTable[HashNum].DstMac[i] = data.DstMac[i];                      for (i = 0; i<4; i++) FlowTable[HashNum].SrcIP[i] = data.SrcIP[i];                      for (i = 0; i<4; i++) FlowTable[HashNum].DstIP[i] = data.DstIP[i];                      for (i = 0; i<2; i++) FlowTable[HashNum].SrcPort[i] = data.SrcPort[i];                      for (i = 0; i<2; i++) FlowTable[HashNum].DstPort[i] = data.DstPort[i];                      for (i = 0; i<2; i++) FlowTable[HashNum].VlanID[i] = data.VlanID[i];                      for (i = 0; i<2; i++) FlowTable[HashNum].VlanType[i] = data.VlanType[i];                      FlowTable[HashNum].Protocol = data.Protocol;                      FlowTable[HashNum].FlowBytes = data.FlowBytes;                      FlowTable[HashNum].PacketsNum = 1;                      index[FlowCnt++] = HashNum;                      return 1;                  }              }           }           else            {                   /*HashNum=(HashNum*HashNum)%NUM_OF_MAX_FLOW;                   add2Hash(data,FlowTable,HashNum);//????????flow? */               HashNum = (HashNum+1) % NUM_OF_MAX_FLOW;                      goto loop;           }     }     return 1;}void print2file(FILE *fw,FLOW *FlowTable){     long long i,j,k;     for(j=0;j<FlowCnt;j++){        i=index[j];        {                        if (FlowTable[i].timeStart == FlowTable[i - 1].timeStart&&FlowTable[i].timeEnd == FlowTable[i - 1].timeEnd)                             break;                        writeHex(fw, FlowTable[i].SrcMac, 6);                        fprintf(fw," ");                        writeHex(fw, FlowTable[i].DstMac, 6);                        fprintf(fw,"\t");                        for (k = 0; k < 3;k++)                            fprintf(fw, "%d.", FlowTable[i].SrcIP[k]);                        fprintf(fw, "%d\t", FlowTable[i].SrcIP[3]);                        for (k = 0; k < 3; k++)                            fprintf(fw, "%d.", FlowTable[i].DstIP[k]);                        fprintf(fw, "%d\t", FlowTable[i].DstIP[3]);                        writeHex(fw, FlowTable[i].SrcPort, 2);                        fprintf(fw," ");                        writeHex(fw, FlowTable[i].DstPort, 2);                        fprintf(fw," ");                        writeHex(fw, FlowTable[i].VlanType, 2);                        fprintf(fw," ");                        writeHex(fw, FlowTable[i].VlanID, 2);                        fprintf(fw," ");                        fprintf(fw, "%d", FlowTable[i].Protocol);                        fprintf(fw,"\t%.3lf",FlowTable[i].timeStart);                        fprintf(fw,"\t%.3lf",FlowTable[i].timeEnd);                        fprintf(fw, "\t%10.3lf", FlowTable[i].timeEnd - FlowTable[i].timeStart);                        //fprintf(fw,"\t%ld",FlowTable[i].FlowBytes);                        fprintf(fw, " ");                        fprintf(fw, "%8lu", FlowTable[i].FlowBytes);                         fprintf(fw, "\t%ld\n", FlowTable[i].PacketsNum);        }     }}void myfun(FILE *fp,FILE *fw){    long long num = 1, HashNum=0,i;    int flag=1;    STREAM data;    uint8 result[16] = { 0 },temp[4];    fseek(fp, 24, SEEK_SET);//???????    do{    //////////////////////////////////////////        fread(&data.TimeStart, sizeof(int), 1, fp);        fread(&data.MicroSec, sizeof(int), 1, fp);        fread(&data.Caplen, sizeof(int), 1, fp);        fread(&data.FlowBytes, sizeof(int), 1, fp);        fread(data.DstMac, sizeof(uint8), 6, fp);//?DstMac         fread(data.SrcMac, sizeof(uint8), 6, fp);        fseek(fp, 2, SEEK_CUR);        fread(data.VlanID, sizeof(uint8), 2, fp);        fread(data.VlanType, sizeof(uint8), 2, fp);        fseek(fp, 9, SEEK_CUR);        fread(&data.Protocol, sizeof(uint8), 1, fp);        fseek(fp, 2, SEEK_CUR);        fread(data.SrcIP, sizeof(uint8), 4, fp);        fread(data.DstIP, sizeof(uint8), 4, fp);        fread(data.SrcPort, sizeof(uint8), 2, fp);        fread(data.DstPort, sizeof(uint8), 2, fp);        //////////////////////////////////////////        for (i = 0; i < 4; i++){            temp[i] = data.SrcMac[i] + data.DstMac[i];        }        for (i = 0; i < 4; i++){            temp[i] += data.SrcIP[i] + data.DstIP[i];        }        for (i = 0; i < 2; i++){            temp[i] += data.SrcPort[i] + data.DstPort[i] + data.VlanID[i] + data.VlanType[i] + data.Protocol;        }        ZEN_LIB::md5(temp, 4, result);        HashNum = string2num(result)%NUM_OF_MAX_FLOW;        /////////////////////        flag=add2Hash(data,FlowTable,abs(HashNum));        //index[num-1]=HashNum;        ///////////////////        fseek(fp, data.Caplen - 42, SEEK_CUR);    } while (num++<N&&flag==1);    print2file(fw,FlowTable);    fclose(fp);    fclose(fw);}int main(){    FILE *fp, *fw, *ftime;    long long j;    int i;    double start, finish;    char str[15]="univ2_pt2",temp[30]="result2.txt";    start = clock();//取开始时间    for (j = 0; j < NUM_OF_MAX_FLOW; j++) index[j] = 0;    for (j = 0; j < NUM_OF_MAX_FLOW; j++) FlowTable[j].flag = 0;    for(i=3;i<=7;i++){        FlowCnt = 0;       str[8]=i+'0';       temp[6]=i+'0';       fp = fopen(str, "rb");       fw = fopen(temp, "w+");       myfun(fp,fw);    }    finish = clock();//取结束时间    ftime = fopen("time.txt", "w+");    fprintf(ftime,"%f seconds\n", (finish - start) / CLOCKS_PER_SEC);//以秒为单位显示之    putchar('\a');    putchar('\a');    //system("pause");}
其中,MD5函数如下(系转载,很抱歉来源忘记了,若有知情者请告知我补上)
#include <stdio.h>#include <stdint.h>#include <string.h>#include <assert.h>//字节序的小头和大头的问题#define ZEN_LITTLE_ENDIAN  0x0123#define ZEN_BIG_ENDIAN     0x3210//目前所有的代码都是为了小头党服务的,不知道有生之年这套代码是否还会为大头党服务一次?#ifndef ZEN_BYTES_ORDER#define ZEN_BYTES_ORDER    ZEN_LITTLE_ENDIAN#endif#ifndef ZEN_SWAP_UINT16#define ZEN_SWAP_UINT16(x)  ((((x) & 0xff00) >>  8) | (((x) & 0x00ff) <<  8))#endif#ifndef ZEN_SWAP_UINT32#define ZEN_SWAP_UINT32(x)  ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \     (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))#endif#ifndef ZEN_SWAP_UINT64#define ZEN_SWAP_UINT64(x)  ((((x) & 0xff00000000000000) >> 56) | (((x) & 0x00ff000000000000) >>  40) | \     (((x) & 0x0000ff0000000000) >> 24) | (((x) & 0x000000ff00000000) >>  8) | \     (((x) & 0x00000000ff000000) << 8 ) | (((x) & 0x0000000000ff0000) <<  24) | \     (((x) & 0x000000000000ff00) << 40 ) | (((x) & 0x00000000000000ff) <<  56))#endif//将一个(字符串)数组,拷贝到另外一个uint32_t数组,同时每个uint32_t反字节序void *swap_uint32_memcpy(void *to, const void *from, size_t length){    memcpy(to, from, length);    size_t remain_len = (4 - (length & 3)) & 3;    //数据不是4字节的倍数,补充0    if (remain_len)    {        for (size_t i = 0; i < remain_len; ++i)        {            *((char *)(to)+length + i) = 0;        }        //调整成4的倍数        length += remain_len;    }    //所有的数据反转    for (size_t i = 0; i < length / 4; ++i)    {        ((uint32_t *)to)[i] = ZEN_SWAP_UINT32(((uint32_t *)to)[i]);    }    return to;}///MD5的结果数据长度static const size_t ZEN_MD5_HASH_SIZE = 16;///SHA1的结果数据长度static const size_t ZEN_SHA1_HASH_SIZE = 20;namespace ZEN_LIB{    /*!    @brief      求某个内存块的MD5,    @return     unsigned char* 返回的的结果,    @param[in]  buf    求MD5的内存BUFFER指针    @param[in]  size   BUFFER长度    @param[out] result 结果    */    unsigned char *md5(const unsigned char *buf,        size_t size,        unsigned char result[ZEN_MD5_HASH_SIZE]);    /*!    @brief      求内存块BUFFER的SHA1值    @return     unsigned char* 返回的的结果    @param[in]  buf    求SHA1的内存BUFFER指针    @param[in]  size   BUFFER长度    @param[out] result 结果    */    unsigned char *sha1(const unsigned char *buf,        size_t size,        unsigned char result[ZEN_SHA1_HASH_SIZE]);};//================================================================================================//MD5的算法//每次处理的BLOCK的大小static const size_t ZEN_MD5_BLOCK_SIZE = 64;//md5算法的上下文,保存一些状态,中间数据,结果typedef struct md5_ctx{    //处理的数据的长度    uint64_t length_;    //还没有处理的数据长度    uint64_t unprocessed_;    //取得的HASH结果(中间数据)    uint32_t  hash_[4];} md5_ctx;#define ROTL32(dword, n) ((dword) << (n) ^ ((dword) >> (32 - (n))))#define ROTR32(dword, n) ((dword) >> (n) ^ ((dword) << (32 - (n))))#define ROTL64(qword, n) ((qword) << (n) ^ ((qword) >> (64 - (n))))#define ROTR64(qword, n) ((qword) >> (n) ^ ((qword) << (64 - (n))))/*!@brief      内部函数,初始化MD5的context,内容@param      ctx*/static void zen_md5_init(md5_ctx *ctx){    ctx->length_ = 0;    ctx->unprocessed_ = 0;    /* initialize state */    ctx->hash_[0] = 0x67452301;    ctx->hash_[1] = 0xefcdab89;    ctx->hash_[2] = 0x98badcfe;    ctx->hash_[3] = 0x10325476;}/* First, define four auxiliary functions that each take as input* three 32-bit words and returns a 32-bit word.*//* F(x,y,z) = ((y XOR z) AND x) XOR z - is faster then original version */#define MD5_F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))#define MD5_G(x, y, z) (((x) & (z)) | ((y) & (~z)))#define MD5_H(x, y, z) ((x) ^ (y) ^ (z))#define MD5_I(x, y, z) ((y) ^ ((x) | (~z)))/* transformations for rounds 1, 2, 3, and 4. */#define MD5_ROUND1(a, b, c, d, x, s, ac) { \         (a) += MD5_F((b), (c), (d)) + (x) + (ac); \         (a) = ROTL32((a), (s)); \         (a) += (b); \     }#define MD5_ROUND2(a, b, c, d, x, s, ac) { \         (a) += MD5_G((b), (c), (d)) + (x) + (ac); \         (a) = ROTL32((a), (s)); \         (a) += (b); \     }#define MD5_ROUND3(a, b, c, d, x, s, ac) { \         (a) += MD5_H((b), (c), (d)) + (x) + (ac); \         (a) = ROTL32((a), (s)); \         (a) += (b); \     }#define MD5_ROUND4(a, b, c, d, x, s, ac) { \         (a) += MD5_I((b), (c), (d)) + (x) + (ac); \         (a) = ROTL32((a), (s)); \         (a) += (b); \     }/*!@brief      内部函数,将64个字节,16个uint32_t的数组进行摘要(杂凑)处理,处理的数据自己序是小头数据@param      state 存放处理的hash数据结果@param      block 要处理的block,64个字节,16个uint32_t的数组*/static void zen_md5_process_block(uint32_t state[4], const uint32_t block[ZEN_MD5_BLOCK_SIZE / 4]){    register unsigned a, b, c, d;    a = state[0];    b = state[1];    c = state[2];    d = state[3];    const uint32_t *x = NULL;    //MD5里面计算的数据都是小头数据.大头党的数据要处理#if ZEN_BYTES_ORDER == ZEN_LITTLE_ENDIAN    x = block;#else    uint32_t swap_block[ZEN_MD5_BLOCK_SIZE / 4];    swap_uint32_memcpy(swap_block, block, 64);    x = swap_block;#endif    MD5_ROUND1(a, b, c, d, x[0], 7, 0xd76aa478);    MD5_ROUND1(d, a, b, c, x[1], 12, 0xe8c7b756);    MD5_ROUND1(c, d, a, b, x[2], 17, 0x242070db);    MD5_ROUND1(b, c, d, a, x[3], 22, 0xc1bdceee);    MD5_ROUND1(a, b, c, d, x[4], 7, 0xf57c0faf);    MD5_ROUND1(d, a, b, c, x[5], 12, 0x4787c62a);    MD5_ROUND1(c, d, a, b, x[6], 17, 0xa8304613);    MD5_ROUND1(b, c, d, a, x[7], 22, 0xfd469501);    MD5_ROUND1(a, b, c, d, x[8], 7, 0x698098d8);    MD5_ROUND1(d, a, b, c, x[9], 12, 0x8b44f7af);    MD5_ROUND1(c, d, a, b, x[10], 17, 0xffff5bb1);    MD5_ROUND1(b, c, d, a, x[11], 22, 0x895cd7be);    MD5_ROUND1(a, b, c, d, x[12], 7, 0x6b901122);    MD5_ROUND1(d, a, b, c, x[13], 12, 0xfd987193);    MD5_ROUND1(c, d, a, b, x[14], 17, 0xa679438e);    MD5_ROUND1(b, c, d, a, x[15], 22, 0x49b40821);    MD5_ROUND2(a, b, c, d, x[1], 5, 0xf61e2562);    MD5_ROUND2(d, a, b, c, x[6], 9, 0xc040b340);    MD5_ROUND2(c, d, a, b, x[11], 14, 0x265e5a51);    MD5_ROUND2(b, c, d, a, x[0], 20, 0xe9b6c7aa);    MD5_ROUND2(a, b, c, d, x[5], 5, 0xd62f105d);    MD5_ROUND2(d, a, b, c, x[10], 9, 0x2441453);    MD5_ROUND2(c, d, a, b, x[15], 14, 0xd8a1e681);    MD5_ROUND2(b, c, d, a, x[4], 20, 0xe7d3fbc8);    MD5_ROUND2(a, b, c, d, x[9], 5, 0x21e1cde6);    MD5_ROUND2(d, a, b, c, x[14], 9, 0xc33707d6);    MD5_ROUND2(c, d, a, b, x[3], 14, 0xf4d50d87);    MD5_ROUND2(b, c, d, a, x[8], 20, 0x455a14ed);    MD5_ROUND2(a, b, c, d, x[13], 5, 0xa9e3e905);    MD5_ROUND2(d, a, b, c, x[2], 9, 0xfcefa3f8);    MD5_ROUND2(c, d, a, b, x[7], 14, 0x676f02d9);    MD5_ROUND2(b, c, d, a, x[12], 20, 0x8d2a4c8a);    MD5_ROUND3(a, b, c, d, x[5], 4, 0xfffa3942);    MD5_ROUND3(d, a, b, c, x[8], 11, 0x8771f681);    MD5_ROUND3(c, d, a, b, x[11], 16, 0x6d9d6122);    MD5_ROUND3(b, c, d, a, x[14], 23, 0xfde5380c);    MD5_ROUND3(a, b, c, d, x[1], 4, 0xa4beea44);    MD5_ROUND3(d, a, b, c, x[4], 11, 0x4bdecfa9);    MD5_ROUND3(c, d, a, b, x[7], 16, 0xf6bb4b60);    MD5_ROUND3(b, c, d, a, x[10], 23, 0xbebfbc70);    MD5_ROUND3(a, b, c, d, x[13], 4, 0x289b7ec6);    MD5_ROUND3(d, a, b, c, x[0], 11, 0xeaa127fa);    MD5_ROUND3(c, d, a, b, x[3], 16, 0xd4ef3085);    MD5_ROUND3(b, c, d, a, x[6], 23, 0x4881d05);    MD5_ROUND3(a, b, c, d, x[9], 4, 0xd9d4d039);    MD5_ROUND3(d, a, b, c, x[12], 11, 0xe6db99e5);    MD5_ROUND3(c, d, a, b, x[15], 16, 0x1fa27cf8);    MD5_ROUND3(b, c, d, a, x[2], 23, 0xc4ac5665);    MD5_ROUND4(a, b, c, d, x[0], 6, 0xf4292244);    MD5_ROUND4(d, a, b, c, x[7], 10, 0x432aff97);    MD5_ROUND4(c, d, a, b, x[14], 15, 0xab9423a7);    MD5_ROUND4(b, c, d, a, x[5], 21, 0xfc93a039);    MD5_ROUND4(a, b, c, d, x[12], 6, 0x655b59c3);    MD5_ROUND4(d, a, b, c, x[3], 10, 0x8f0ccc92);    MD5_ROUND4(c, d, a, b, x[10], 15, 0xffeff47d);    MD5_ROUND4(b, c, d, a, x[1], 21, 0x85845dd1);    MD5_ROUND4(a, b, c, d, x[8], 6, 0x6fa87e4f);    MD5_ROUND4(d, a, b, c, x[15], 10, 0xfe2ce6e0);    MD5_ROUND4(c, d, a, b, x[6], 15, 0xa3014314);    MD5_ROUND4(b, c, d, a, x[13], 21, 0x4e0811a1);    MD5_ROUND4(a, b, c, d, x[4], 6, 0xf7537e82);    MD5_ROUND4(d, a, b, c, x[11], 10, 0xbd3af235);    MD5_ROUND4(c, d, a, b, x[2], 15, 0x2ad7d2bb);    MD5_ROUND4(b, c, d, a, x[9], 21, 0xeb86d391);    state[0] += a;    state[1] += b;    state[2] += c;    state[3] += d;}/*!@brief      内部函数,处理数据的前面部分(>64字节的部分),每次组成一个64字节的block就进行杂凑处理@param[out] ctx  算法的context,用于记录一些处理的上下文和结果@param[in]  buf  处理的数据,@param[in]  size 处理的数据长度*/static void zen_md5_update(md5_ctx *ctx, const unsigned char *buf, size_t size){    //为什么不是=,因为在某些环境下,可以多次调用zen_md5_update,但这种情况,必须保证前面的调用,每次都没有unprocessed_    ctx->length_ += size;    //每个处理的块都是64字节    while (size >= ZEN_MD5_BLOCK_SIZE)    {        zen_md5_process_block(ctx->hash_, reinterpret_cast<const uint32_t *>(buf));        buf += ZEN_MD5_BLOCK_SIZE;        size -= ZEN_MD5_BLOCK_SIZE;    }    ctx->unprocessed_ = size;}/*!@brief      内部函数,处理数据的末尾部分,我们要拼出最后1个(或者两个)要处理的BLOCK,加上0x80,加上长度进行处理@param[in]  ctx    算法的context,用于记录一些处理的上下文和结果@param[in]  buf    处理的数据@param[in]  size   处理buffer的长度@param[out] result 返回的结果,*/static void zen_md5_final(md5_ctx *ctx, const unsigned char *buf, size_t size, unsigned char *result){    uint32_t message[ZEN_MD5_BLOCK_SIZE / 4];    //保存剩余的数据,我们要拼出最后1个(或者两个)要处理的块,前面的算法保证了,最后一个块肯定小于64个字节    if (ctx->unprocessed_)    {        memcpy(message, buf + size - ctx->unprocessed_, static_cast<size_t>(ctx->unprocessed_));    }    //得到0x80要添加在的位置(在uint32_t 数组中),    uint32_t index = ((uint32_t)ctx->length_ & 63) >> 2;    uint32_t shift = ((uint32_t)ctx->length_ & 3) * 8;    //添加0x80进去,并且把余下的空间补充0    message[index] &= ~(0xFFFFFFFF << shift);    message[index++] ^= 0x80 << shift;    //如果这个block还无法处理,其后面的长度无法容纳长度64bit,那么先处理这个block    if (index > 14)    {        while (index < 16)        {            message[index++] = 0;        }        zen_md5_process_block(ctx->hash_, message);        index = 0;    }    //补0    while (index < 14)    {        message[index++] = 0;    }    //保存长度,注意是bit位的长度,这个问题让我看着郁闷了半天,    uint64_t data_len = (ctx->length_) << 3;    //注意MD5算法要求的64bit的长度是小头LITTLE-ENDIAN编码,注意下面的比较是!=#if ZEN_BYTES_ORDER != ZEN_LITTLE_ENDIAN    data_len = ZEN_SWAP_UINT64(data_len);#endif    message[14] = (uint32_t)(data_len & 0x00000000FFFFFFFF);    message[15] = (uint32_t)((data_len & 0xFFFFFFFF00000000ULL) >> 32);    zen_md5_process_block(ctx->hash_, message);    //注意结果是小头党的,在大头的世界要进行转换#if ZEN_BYTES_ORDER == ZEN_LITTLE_ENDIAN    memcpy(result, &ctx->hash_, ZEN_MD5_HASH_SIZE);#else    swap_uint32_memcpy(result, &ctx->hash_, ZEN_MD5_HASH_SIZE);#endif}//计算一个内存数据的MD5值unsigned char *ZEN_LIB::md5(const unsigned char *buf,    size_t size,    unsigned char result[ZEN_MD5_HASH_SIZE]){    assert(result != NULL);    md5_ctx ctx;    zen_md5_init(&ctx);    zen_md5_update(&ctx, buf, size);    zen_md5_final(&ctx, buf, size, result);    return result;}

四、结果

最终提取出来的结果示例如下,其中,排列格式为(从左至右):
源mac地址、目的mac地址、源ip地址、目的IP地址、源端口、目的端口、vlan、协议类型、流首包时间、流末包时间、流总大小

0030488b0be8 00000c07ac00   244.157.209.31  27.88.27.89 0022 e984 0038 4500 157 1264194698.490  1264194698.490       0.000       70 10009e9b6e18a 01005e000002   210.218.218.164 7.193.7.193 001c 6679 0030 45c0 157 1264194698.836  1264194698.836       0.000       62 10009e97fd60a 0030488b0be8   244.157.82.156  27.89.27.88 0022 a1ea 005d 4500 157 1264194698.845  1264194698.845       0.000      107 10009e97fd60a 003048859c18   244.157.82.106  27.89.27.88 0022 3a82 00ec 4500 157 1264194698.887  1264194698.887       0.000      250 10009e9b6e18a 01005e000002   210.218.218.164 7.193.7.193 001c 167a 0030 45c0 157 1264194698.936  1264194698.936       0.000       62 1003048859c18 00000c07ac00   244.157.209.31  27.88.27.89 0022 e50a 005e 4500 157 1264194698.986  1264194698.986       0.000      108 10009e97fd60a 003048859c18   244.157.82.106  27.89.27.88 0022 ca8f 005d 4500 157 1264194699.335  1264194699.335       0.000      107 1003048859c18 00000c07ac00   244.157.209.31  27.88.27.89 0022 0b70 0038 4500 157 1264194699.734  1264194699.734       0.000       70 10009e97fd60a 003048859c18   244.157.82.106  27.89.27.88 0022 cb4a 005d 4500 157 1264194700.084  1264194700.084       0.000      107 10009e97fd60a 0030488b0cf6   244.157.82.176  27.89.27.88 0022 d07f 0080 4500 157 1264194700.088  1264194700.088       0.000      142 1 
0 0
原创粉丝点击