原子化字符串

来源:互联网 发布:linux运维之道 编辑:程序博客网 时间:2024/05/20 01:39
在编译器或者其它语言处理软件的开发过程中,字符串管理是非常重要的。
原子化字符串可以提升符号管理和文件名管理。

下面是区区编写的一个简单实现,intern一词是来源于emacs lisp的intern函数:
///////////////////////Pimpl模式声明
class AtomsImpl;
class Atoms{
public:
const char * intern(const string &s);
const char * intern(const char *s);
Atoms();
~Atoms();
private:
AtomsImpl *pimpl;
};



///////////////////////Atoms string container实现

struct AtomsImplNode{
size_t len;
size_t hash_val;
char * s;
AtomsImplNode * next;
};

class AtomsImpl{
public:
const char * intern(const string &s);
const char * intern(const char *s);
AtomsImpl();
~AtomsImpl();
private:
vector<AtomsImplNode*> buckets;
size_t atom_count;
};

AtomsImpl::AtomsImpl():buckets(1), atom_count(0){
}

AtomsImpl::~AtomsImpl(){
vector<AtomsImplNode*>::iterator beg = buckets.begin(), end = buckets.end();
for(; beg!=end; ++beg) {
AtomsImplNode *head = *beg, *last;
while(head) {
last = head;
head = head->next;
delete [] last->s;
delete last;
}
}
}

const char *AtomsImpl::intern(const string &s){
const char *ret = NULL;
if (atom_count > buckets.size()) { //rehash
size_t new_buckets_size = buckets.size() * 2;
vector<AtomsImplNode*> new_buckets(new_buckets_size);
vector<AtomsImplNode*>::iterator beg = buckets.begin(), end = buckets.end();
for(; beg!=end; ++beg) {
AtomsImplNode *head = *beg, *last;
while(head) {
last = head;
head = head->next;
size_t idx = last->hash_val % new_buckets_size;
if (new_buckets[idx]) {
AtomsImplNode *tail = new_buckets[idx];
while(tail->next)
tail = tail->next;
tail->next = last;
last->next = NULL;
}else{
new_buckets[idx] = last;
last->next = NULL;
}
}
}
buckets.swap(new_buckets);
}
size_t hash_val = 7, len = s.size(), hidx = 0;
while(hidx<len){
hash_val = hash_val * 31 + s[hidx++];
}
size_t idx = hash_val % buckets.size();
AtomsImplNode *tail = NULL; //tricky to reduce insert code
if (buckets[idx]){
AtomsImplNode *head = buckets[idx];
while(head){
if (head->len == len &&
head->hash_val == hash_val &&
!strcmp(s.c_str(), head->s))
return head->s;
tail = head;
head = head->next;
}
}
AtomsImplNode *newNode = new AtomsImplNode;
newNode->len = len;
newNode->hash_val = hash_val;
newNode->next = NULL;
newNode->s = new char[len+1];
strcpy(newNode->s, s.c_str());
if (tail){
tail->next = newNode;
} else {
buckets[idx] = newNode;
}
++atom_count;
return newNode->s;

}
const char *AtomsImpl::intern(const char *s){
return intern(string(s));
}

Atoms::Atoms(){
pimpl = new AtomsImpl;
}
Atoms::~Atoms(){
delete pimpl;
}
const char * Atoms::intern(const string &s){
return pimpl->intern(s);
}
const char * Atoms::intern(const char *s){
return pimpl->intern(s);
}

原创粉丝点击