Judy矩阵排重使用实例

来源:互联网 发布:mac 2017 编辑:程序博客网 时间:2024/06/02 04:25

背景:客户反馈每天统计的网民IP使用量不对劲,每天5时最少有180多万个IP访问,每天21时许达到最多230多万个IP访问,客户质疑高峰与低谷之间只差了50多万,对数据产生了怀疑。

解决办法,对某一天的5时段和21时段的DNS访问日志进行排重,对源IP(网民IP)进行排重统计。


#include <stdio.h>#include "Judy.h"#include <string>#include <iostream>#include <zlib.h>#define MAXLINE 100000000                 // max string (line) length//#define FILENUM 96#define FILENUM 27using namespace std;uint8_t   Index[MAXLINE];               // string to insertint     // Usage:  JudySort < file_to_sortmain(){    Pvoid_t   PJArray = (PWord_t)NULL;  // Judy array.    PWord_t   PValue;                   // Judy array element.    Word_t    Bytes;                    // size of JudySL array.    long CipNum = 0;    const char *Files[FILENUM] = {        ///*        "/trans/ResultFile/DnsMergeResult/DX001/20161204/finish/DX001_20161204055407_00.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_00.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_01.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_02.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_03.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_04.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_05.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_06.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_07.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_08.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_09.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_10.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_11.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_12.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_13.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_14.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_15.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204055423_16.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_00.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_01.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_02.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_03.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_04.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_05.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_06.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204055419_07.mer.gz",        "/trans/ResultFile/DnsMergeResult/YD001/20161204/finish/YD001_20161204055404_00.mer.gz"        /*        "/trans/ResultFile/DnsMergeResult/DX001/20161204/finish/DX001_20161204215409_00.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_00.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_01.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_02.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_03.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_04.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_05.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_06.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_07.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_08.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_09.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_10.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_11.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_12.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_13.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_14.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_15.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_16.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_17.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_18.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_19.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_20.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_21.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_22.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_23.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_24.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_25.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_26.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_27.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_28.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_29.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_30.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_31.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_32.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_33.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_34.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_35.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_36.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_37.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_38.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_39.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_40.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_41.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_42.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_43.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_44.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_45.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_46.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_47.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_48.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_49.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_50.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_51.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_52.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_53.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_54.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_55.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_56.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_57.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_58.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_59.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_60.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_61.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_62.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_63.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_64.mer.gz",        "/trans/ResultFile/DnsMergeResult/DX002/20161204/finish/DX002_20161204215506_65.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_00.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_01.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_02.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_03.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_04.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_05.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_06.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_07.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_08.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_09.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_10.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_11.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_12.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_13.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_14.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_15.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_16.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_17.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_18.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_19.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_20.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_21.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_22.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_23.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_24.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_25.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_26.mer.gz",        "/trans/ResultFile/DnsMergeResult/TT001/20161204/finish/TT001_20161204215439_27.mer.gz",        "/trans/ResultFile/DnsMergeResult/YD001/20161204/finish/YD001_20161204215404_00.mer.gz"        */        };    /*const char *Files[FILENUM] = {       "DX002_20161204055423_00.mer.gz","DX002_20161204055423_01.mer.gz"};*/    int FileNum = 0;    uint8_t Cip[9] = {0};    //FILE *Fp = NULL;    gzFile Fp ;    Fp = gzopen(Files[FileNum], "r");    if(NULL == Fp)    {        cerr << "cannot open file" << Files[FileNum] << endl;        return 0;    }    while( FileNum != FILENUM)    {        while (gzgets(Fp, (char *)Index, MAXLINE) != (char *)NULL)        {            sscanf((char *)Index, "%*s\t%*s\t%x\t%*x\t%*s\n", (char *)Cip);            JSLI(PValue, PJArray, Cip);   // store string into array            if (PValue == PJERR)            // if out of memory?            {                               // so do something                printf("Malloc failed -- get more ram\n");                exit(1);            }            ++(*PValue);                    // count instances of string        }        gzclose(Fp);        FileNum++;        if(FileNum != FILENUM)        {            Fp = gzopen(Files[FileNum], "r");            if(NULL == Fp)            {                cerr << "cannot open file" << Files[FileNum] << endl;                return 0;            }        }            }    //gzclose(Fp);        Index[0] = '\0';                    // start with smallest string.    JSLF(PValue, PJArray, Index);       // get first string    while (PValue != NULL)    {        while ((*PValue)--)             // print duplicates        {                    }//printf("%s", Index);        ++CipNum;        JSLN(PValue, PJArray, Index);   // get next string    }    JSLFA(Bytes, PJArray);              // free array    fprintf(stderr, "The JudySL array used %lu bytes of memory\n", Bytes);    cout << "++++++++++++++++++FINANLLY+++++++cip  = " <<  CipNum << endl;    return (0);}





1 0
原创粉丝点击