GCC hash_map:string为键和const char*为键的性能比较

来源:互联网 发布:移动图书馆图书馆软件 编辑:程序博客网 时间:2024/06/03 21:58

http://hi.baidu.com/ah__fu/item/7a498928b3d44380ae48f537



测试的源码如下:
//-----------------------------------------------------------------------------------------
#include <stdio.h>
#include <ext/hash_map>
using namespace __gnu_cxx;
#include <utility>
#include <functional>
using namespace std;
#include <string.h>
#include <assert.h>

#define MAX_USERNAME_LEN 40
#define P(format, ...) printf("%s %s %d " format "\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)

//调用INTEL CPU指令RDTSC来获得时间计数,便于得到代码段的性能指标
unsigned long long rdtsc()
{
#ifdef _MSC_VER /* msvc compiler */
    __asm _emit 0x0F
    __asm _emit 0x31
#else /* gcc compiler */
    unsigned long long temp;
    unsigned int low, high;
    __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
    temp = high;
    temp <<= 32;
    temp += low;
    return temp;
#endif
}

struct UserInfo
{
    char UserName[MAX_USERNAME_LEN];
    short Length;
};

#define MAX_USERS 600000
UserInfo* pUsers = NULL;
int UserCount = 0;

void make_hash(UserInfo* Users, int UserCount);
void make_char_hash(UserInfo* Users, int UserCount);

void test()
{
    //读入用户
    FILE* fp = fopen("users.txt", "r");
    if (NULL==fp)
    {
        P("open file error");
        return;
    }
    pUsers = new UserInfo[MAX_USERS];
    char temp_username[MAX_USERNAME_LEN];
    while (NULL!=fgets(temp_username, sizeof(temp_username), fp))
    {
        strncpy(pUsers[UserCount].UserName, temp_username, sizeof(pUsers[0].UserName)-1);
        pUsers[UserCount].Length = strlen(pUsers[UserCount].UserName);
        UserCount++;
    }
    fclose(fp);
    fp = NULL;
    P("user count=%d", UserCount);
    //
    make_hash(pUsers, UserCount);
    make_char_hash(pUsers, UserCount);
    delete[] pUsers;
    pUsers = NULL;
}

int main()
{
    test();
    return 1;
}

//=============================================================================

#include <string>
using namespace std;

struct str_hash
{
    size_t operator()(const string& str) const
    {
        return __stl_hash_string(str.c_str());
    }
};

typedef hash_map<string, UserInfo*, str_hash> StringHash;
typedef StringHash::iterator StringHashIterator;

void test_string_find(StringHash& hash, UserInfo* Users, int UserCount)
{
    int i;
    StringHashIterator it;
    unsigned long long start, end;
    start = rdtsc();
    for (i=0; i<UserCount; i++)
    {
        it = hash.find(Users[i].UserName);
        if (it == hash.end())
        {
            P("not found %s", Users[i].UserName);
        }
    }
    end = rdtsc();
    end -= start;
    P("string find \t= %llu", end);
}

void test_string_erase(StringHash& hash, UserInfo* Users, int UserCount)
{
    int i;
    unsigned long long start, end;
    start = rdtsc();
    for (i=0; i<UserCount; i++)
    {
        hash.erase(Users[i].UserName);
    }
    end = rdtsc();
    end -= start;
    P("string erase \t= %llu", end);
    assert(hash.size()==0);
}

void make_hash(UserInfo* Users, int UserCount)
{
    hash_map<string, UserInfo*, str_hash> hh;
    int i;
    unsigned long long start, end;
    start = rdtsc();
    for (i=0; i<UserCount; i++)
    {
        hh.insert(make_pair(Users[i].UserName, Users+i));
    }
    end = rdtsc();
    end -= start;
    P("string spend\t= %llu", end);
    test_string_find(hh, Users, UserCount);
    test_string_erase(hh, Users, UserCount);
}
//=============================================================================
namespace std
{
    template <>
    struct equal_to<const char*> : public binary_function<const char*, const char*, bool>
    {
        bool operator()(const char* str1, const char* str2) const
        {
            return 0==strcmp(str1, str2);
        }
    };
};

struct char_hash
{
    size_t operator()(const char* str) const
    {
        return __stl_hash_string(str);
    }
};

//typedef hash_map<const char*, UserInfo*, char_hash, std::equal_to<const char*> > CharHash;
typedef hash_map<const char*, UserInfo*, hash<const char*> > CharHash;
typedef CharHash::iterator CharHashIterator;

void test_char_find(CharHash& hash, UserInfo* Users, int UserCount)
{
    int i;
    CharHashIterator it;
    unsigned long long start, end;
    start = rdtsc();
    for (i=0; i<UserCount; i++)
    {
        it = hash.find(Users[i].UserName);
        if (it == hash.end())
        {
            P("not found %s", Users[i].UserName);
        }
    }
    end = rdtsc();
    end -= start;
    P("char hash find \t= %llu", end);
}

void test_char_erase(CharHash& hash, UserInfo* Users, int UserCount)
{
    int i;
    unsigned long long start, end;
    start = rdtsc();
    for (i=0; i<UserCount; i++)
    {
        hash.erase(Users[i].UserName);
    }
    end = rdtsc();
    end -= start;
    P("char erase \t= %llu", end);
    assert(hash.size()==0);
}

void make_char_hash(UserInfo* Users, int UserCount)
{
    //hash_map<const char*, UserInfo*, char_hash, std::equal_to<const char*> > hh;
    CharHash hh;
    int i;
    unsigned long long start, end;
    start = rdtsc();
    for (i=0; i<UserCount; i++)
    {
        hh.insert(make_pair(Users[i].UserName, Users+i));
    }
    end = rdtsc();
    end -= start;
    P("char spend\t= %llu", end);
    test_char_find(hh, Users, UserCount);
    test_char_erase(hh, Users, UserCount);
}

/*
g++ -o string_hash.o -c string_hash.cpp -g -Wall
g++ -o string_hash.exe string_hash.o
*/
//----------------------------------------------------------------------------------------- 

我在WINDOWS上测试的结果如下:
操作    string      const char* 比例
insert 874604024   260466464   70.21892687
find    2442471712 1716838224 29.70898228
erase   3881273948 2512858480 35.25686376
第二次测试结果:
操作    string      const char* 比例
insert 871314964   252919964   70.9726133
find    2449833580 1655214244 32.43564553
erase   3877847952 2412670168 37.78327057

在LINUX下的测试结果为:
操作    string      const char* 比例
insert 2244910584 2002127911 10.8148037
find    1186194625 870002035   26.65604643
erase   1324754305 965095173   27.1491197
第二次测试结果:
操作    string      const char* 比例
insert 2257350144 2002011515 11.31143211
find    1184883007 868900151   26.66785279
erase   1320117792 961977912   27.12938816

   测试中使用rdtsc指令得到CPU的时钟数,const char*花费的时间除以string花费的时间得到比例。从测试结果可以看出,直接使用字符串指针,性能比使用string至少高出10%。