Unicode(UTF16)文件读写终极方案

来源:互联网 发布:双汇烤肠 知乎 编辑:程序博客网 时间:2024/05/24 03:20

去年写过一个博客介绍了自己写的一个读写Unicode编码文件的方法,这个方法不使用系统提供的类库,使用标准C++的方法,因此很很好的可移植性。

上次写的没经过大规模的测试,这几天对这个方法进行了修改:

1支持Windows和Linux的换行符

2修改了部分代码,使之更加健壮

代码如下:

/*      *************************************************    功能   : 按照行读取UTF16编码的文件,读取后去掉换行符    参数   :     返回值 :     -------------------------------------------------    备注   : 可以处理windows和linux格式的换行符    -------------------------------------------------    作者   :Li Yachao    时间   :2013-2-26    **************************************************/bool FileIO::UTF16FileRead(const std::string& path, std::vector<std::wstring> &vec_lines){/*****************打开文件***********************************************/vec_lines.clear();std::ifstream fin;fin.open(path.c_str(),std::ios::in | std::ios::binary);if (!fin.is_open()){std::cerr << "Open " << path << " error!" << std::endl;return false;}char  buffer[3] = {'\0'};fin.read(buffer,2);int  file_format = 0;/*指示文件格式,1Linux,2Windows*/std::string utf16flag = "\xff\xfe";std::string fileflag = buffer;if((fileflag.length() >=2)&&(fileflag.at(0) == '\xff') && (fileflag.at(1) == '\xfe')){fileflag = fileflag.substr(0,2);}if(fileflag != utf16flag){std::cerr << "File " << path << " type error!" << std::endl;return false;}/*处理Unicode编码文件*/const int clength = 3;char cc[clength]={'\0'} ;/*当前读入的字符*/char pc[clength]={'\0'} ;/*当前的前驱字符*/std::string line="";int lineIndex = 0;while (fin.read(cc,2)){/*一次读入两个字节*/line += cc[0];line += cc[1];if((cc[0] == '\x0a') &&( cc[1] == '\x00')){if(file_format == 0){if((pc[0] == '\x0d') && (pc[1] == '\x00')){file_format = 2;}else{file_format = 1;}}if (((file_format == 1)&&(line.length() == 2)) || ((file_format == 2)&&(line.length() == 4))){/*表示空行*/line.clear();vec_lines.push_back(L"");continue;}/*换行符标志*//*Unicode文件的字节流转换为宽字符*/if(file_format == 1){line = line.substr(0,line.length() - 2);}else if (file_format == 2){line = line.substr(0,line.length() - 4);}std::wstring result = L"";for(int i=0;i<line.length() -1;i+=2){unsigned char c1 = line[i];unsigned char c2 = line[i + 1];unsigned short wc;if(c2 == 0){wc = c1 ;}else{wc = c2 ;wc = wc << 8;wc += c1;}result += wc;}vec_lines.push_back(result);/*********************/result.clear();line.clear();/*********************/lineIndex ++;}if(file_format==0){strcpy(pc,cc);/*保存当前两个字符的前驱字符*/memset(cc,'\0',sizeof(char)*clength);}}if(! line.empty()){std::wstring result = L"";for(int i=0;i<line.length() -1;i+=2){unsigned char c1 = line[i];unsigned char c2 = line[i + 1];unsigned short wc;if(c2 == 0){wc = c1 ;}else{wc = c2 ;wc = wc << 8;wc += c1;}result += wc;}vec_lines.push_back(result);}fin.close();return true;}


原创粉丝点击