#OBJS=handledata.o source.oOBJS=source.omain:$(OBJS)        g++  -g -o lq-client $(OBJS) -l curl -l pthread  -std=c++11#handledata.o:handledata.c#       g++ -c -Wall handledata.c -I /usr/local/include/curl -std=c++11source.o:source.cpp        g++ -g -c -Wall source.cpp  -l curl -l pthread  -std=c++11clean:        rm -f lq-client $(OBJS)


#include <stdio.h>//#include <unistd.h>//access//#include <fcntl.h>//access#include <iconv.h> //iconv_open#include <stdlib.h>//malloc \relloc#include <string.h>//strlen\strcpy\strcat#include <pthread.h> //pthread#include <vector>#include <map>#include <string>//string#include  <sstream> //stream#include <iostream> //cout#include  <regex> //正则#include <sys/time.h>#include <curl/curl.h>using namespace std;struct MemoryStruct {    char *memory;    size_t size;};static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp){    size_t realsize = size * nmemb;    struct MemoryStruct *mem = (struct MemoryStruct *)userp;    mem->memory = (char *) realloc(mem->memory, mem->size + realsize + 1);    if (mem->memory == NULL)     {        /* out of memory! */        printf("not enough memory (realloc returned NULL)\n");        return 0;    }    memcpy(&(mem->memory[mem->size]), contents, realsize);    mem->size += realsize;    mem->memory[mem->size] = 0;    return realsize;}//这是libcurl接收数据的回调函数,相当于recv的死循环  int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,          char *outbuf, size_t outlen) {      iconv_t cd;      char **pin = &inbuf;      char **pout = &outbuf;      cd = iconv_open(to_charset, from_charset);      if (cd == 0)          return -1;      memset(outbuf, 0, outlen);      if (iconv(cd, pin, &inlen, pout, &outlen) == -1)          return -1;      iconv_close(cd);  //    *pout = '\0';      return 0;  }  int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {      return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);  }  int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {      return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);  } void FreeStrBuf(char **ppBuf)//释放指针{    if (NULL != (*ppBuf))    {        free((*ppBuf));        (*ppBuf) = NULL;    }}void FreeStrBuf(int **ppBuf){    if (NULL != (*ppBuf))    {        free((*ppBuf));        (*ppBuf) = NULL;    }}pthread_mutex_t sum_mutex;//互斥锁class shares_data{public:    shares_data()    {        name = "";    }    std::string name;};/*class op_class{public:    op_class()    {        cur_num = 0;        shares_map.clear();    }    int cur_num;    std::map<int, class shares_data>  shares_map;};*/inline void NEXT(const string&T, vector<int>&next) {//按模式串生成vector,next(T.size())    next[0] = -1;    for (int i = 1; i<T.size(); i++) {        int j = next[i - 1];        while (T[i - 1] != T[j] && j >= 0) j = next[j];//递推计算        if (T[i - 1] == T[j]) next[i] = j + 1;        else next[i] = 0;    }}inline string::size_type KMP(const string&S, const string&T) {    //利用模式串T的next函数求T在主串S中的个数count的KMP算法    //其中T非空,    vector<int>next(T.size());    NEXT(T, next);    string::size_type index, count = 0;    for (index = 0; index<S.size(); ++index)     {        int pos = 0;        string::size_type iter = index;        while (pos<T.size() && iter<S.size())         {            if (S[iter] == T[pos]) { ++iter; ++pos; }            else {                if (pos == 0) ++iter;                else pos = next[pos - 1] + 1;            }        }        if (pos == T.size() && (iter - index) == T.size()) ++count;        if (count == 1)        {           // std::cout << "index is "<<index<< std::endl;        break;        }        //std::cout << "index is " << index << std::endl;    }    //std::cout << "T size is " << T.size() << std::endl;    //std::cout << "S is " << S << std::endl;    //std::cout << "S size is " << S.size() << std::endl;    return count;}std::map<std::string, std::string>  shares_map;//std::map<std::string, class shares_data>  shares_map;void*  getdata(void * num){    // 对传入的参数进行强制类型转换,由无类型指针变为整形数指针,然后再读取    //这里面对num的修改是对其副本的修改    int num_int = 0;    num_int =   *((int*) &num);    char *prelink = "http://q.10jqka.com.cn/index/index/board/all/field/zdf/order/desc/page/";    char *suflink = "/ajax/1/";    char buf[10] = { NULL };    sprintf(buf, "%d", num);    char *charnum = buf;    char *templink = (char*) malloc(strlen(charnum) + strlen(suflink) + 1);    strcpy(templink, charnum);    strcat(templink, suflink);    char *totallink = (char*) malloc(strlen(prelink) + strlen(templink) + 1);    strcpy(totallink, prelink);    strcat(totallink, templink);    //printf("%s\r\n", totallink);    //shares_data shares_data_obj;    //std::pair<string, class shares_data> shares_pair(num_int, shares_data_obj);    CURL *curl;    CURLcode res;    struct MemoryStruct chunk;    chunk.memory = (char*) malloc(1);  /* will be grown as needed by the realloc above */    chunk.size = 0;    /* no data at this point */    curl = curl_easy_init();    // const char *filename ="/home/cpp/data.txt";    if (curl)    {        curl_easy_setopt(curl, CURLOPT_URL, totallink);        /* example.com is redirected, so we tell libcurl to follow redirection */        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10);        //网页采用gzip压缩        curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");        //指定回调函数          curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);        //这个变量可作为接收或传递数据的作用          curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &chunk);        /* Perform the request, res will get the return code */        res = curl_easy_perform(curl);        /* Check for errors */        if (res != CURLE_OK)            fprintf(stderr, "curl_easy_perform() failed: %s\n",                curl_easy_strerror(res));        /* always cleanup */        curl_easy_cleanup(curl);    }    //printf("size is %d\n\r\n", chunk.size);    // printf("%s",chunk.memory);     char * databuf = 0;    databuf = (char*) malloc((int) 2 * (chunk.size));    g2u(chunk.memory, chunk.size, databuf, 2 * (chunk.size));    //printf("%d", sizeof(databuf));    //printf("%s", databuf);    //pthread_mutex_lock(&sum_mutex);    std::string line;    //line = databuf;    //std::string data_string(databuf);    std::stringstream stream;    stream << databuf;    enum STATEFLAG     {        SHARES_ID,        SHARES_NAME,    };    STATEFLAG linecurflag = SHARES_ID;//当前状态标记    std::pair<std::string, std::string> shares_pair;    std::string shares_code = "";    std::string shares_name = "";        while (std::getline(stream, line))        {            switch (linecurflag)            {            case SHARES_ID:            {                //std::string pattern_1("/\" target = \"_blank\">([0-9])+");                //std::regex rgx_1(pattern_1);                //std::smatch readresults_1;                //if (std::regex_search(line, readresults_1, rgx_1))//regex_search只匹配第一个符合的                //std::cout << line<<std::endl;                if (KMP(line, " target=\"_blank\">"))                {            linecurflag = SHARES_NAME;            shares_code = line.substr(89,6);                   // std::cout<<shares_code<<std::endl;        }                break;            }        case SHARES_NAME:        {            shares_pair.first = shares_code;            shares_pair.second = shares_code;            shares_map.insert(shares_pair);            shares_code = "";            shares_name = "";            linecurflag = SHARES_ID;            break;        }            default:            {                continue;            }            }        }    //shares_map.insert(shares_pair);    //pthread_mutex_unlock(&sum_mutex);    free(databuf);    free(chunk.memory);    FreeStrBuf(&totallink);    FreeStrBuf(&templink);    pthread_exit(0);}int main(void){    curl_global_init(CURL_GLOBAL_DEFAULT);    unsigned long start_time, end_time;    time_t t;    start_time = time(&t);    pthread_t thread_id;    std::vector<pthread_t> thread_id_list;    thread_id_list.clear();    pthread_mutex_init(&sum_mutex,NULL);    shares_map.clear();    int num = 1;    int endpages = 147; //最终页数    for (; num <= endpages; ++num)    {        pthread_create(&thread_id, NULL, getdata, (void*) num);//传入到参数必须强转为void*类型,即无类型指针,且不能为引用以防被修改        thread_id_list.push_back(thread_id);    }    for (int i = 0; i < thread_id_list.size(); ++i)    {        pthread_join(thread_id_list[i], NULL);    }    curl_global_cleanup();    pthread_mutex_destroy(&sum_mutex);//注销锁    for (std::map<std::string, std::string>::iterator  it = shares_map.begin(); it != shares_map.end();++it)    {        //printf("The int is:%d\n", it->first);    }    printf("The num of pages is:%d\n", shares_map.size());    end_time = time(&t);    printf("The start time is:%d\n", start_time);    printf("The end time is:%d\n", end_time);    printf( "The total time used is:%d\n" ,(end_time - start_time));  return 0;}
