pb字符串实现GB2312与Unicode、UTF8之间的相互转换

来源:互联网 发布:主要的网络侦察技术 编辑:程序博客网 时间:2024/05/06 07:00

以下对象用于字符串PB实现GB2312与Unicode、UTF8之间的相互转换,对象的代码将持续完善

 

$PBExportHeader$n_cst_unicode_cvt.sruforwardglobal type n_cst_unicode_cvt from nonvisualobjectend typeend forwardglobal type n_cst_unicode_cvt from nonvisualobject autoinstantiateend typetype prototypesFunction int WideCharToMultiByte(uint CodePage, ulong dwFlags, ref char lpWideCharStr[], int cchWideChar, ref string lpMultiByteStr, int cbMultiByte,ref string lpDefaultChar,ref boolean lpUsedDefaultChar) Library "KERNEL32.DLL" FUNCTION int MultiByteToWideChar(uint CodePage, ulong dwFlags, ref string lpMultiByteStr,int cchMultiByte,ref char lpWideCharStr[],  int cchWideChar) LIBRARY "kernel32.dll" ALIAS FOR "MultiByteToWideChar"FUNCTION ulong LCMapString(ulong Locale,ulong dwMapFlags,ref string lpSrcStr,ulong cchSrc,ref string lpDestStr,ulong cchDest) LIBRARY "kernel32.dll" ALIAS FOR "LCMapStringA"end prototypestype variablesconstant long CP_ACP = 0        // default to ANSI code pageconstant long CP_UTF8 = 65001   // default to UTF-8 code pageconstant long MB_PRECOMPOSED = 1end variablesforward prototypespublic function string of_jt_2_ft (string src)public function string of_gb_to_unicode (string as_source)public function string of_unicode_to_gb (string as_source)public function string of_unicode_to_utf8 (string as)private function string of_dec2hex (long al)private function long of_hex2dec (string as)public function string of_uft8_to_unicode (string as)public function string of_hex2bin (string s_hex)public function string of_bin2hex (string s_binary)public function string of_gb_to_utf8 (string as)end prototypespublic function string of_jt_2_ft (string src);//Constant long LCMAP_TRADITIONAL_CHINESE=67108864//Constant long LCMAP_SIMPLIFIED_CHINESE=33554432//string ls_target,ls_src//string sn//ulong un//blob lb_blob//long ll_len////ls_src=src////ll_len=len(ls_src)*2+1////ls_target=space(ll_len)////setnull(sn)//setnull(un)////LCMapString(2052,LCMAP_TRADITIONAL_CHINESE,ls_src,-1,ls_target,ll_len)//MultiByteToWideChar (936, 0, ls_target, -1, lb_blob,ll_len)//WideCharToMultiByte (950, 0, lb_blob, -1, ls_target,ll_len, sn,un)return ls_targetend functionpublic function string of_gb_to_unicode (string as_source);//gb2312→unicodeSTRING ls_Ret,ls_NULL,ls_tmp Char lc_Data[] LONG ll_Count,ll_Index,ll_Pos,ll_Len BOOLEAN lb_UseDefault   =   FALSE SetNULL(ls_NULL) ls_tmp   =   lc_Data //首先获取转换后返回串的长度 ll_Len = lenw(as_source) * 2//分配空间 lc_Data   =   Space(ll_Len) //获取转换后返回串内容 MultiByteToWideChar(936,0,as_source,-1,lc_Data,ll_Len)//MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,as_source,-1,lc_Data,ll_Len)ll_count = upperbound(lc_Data)if mod(ll_count, 2) <> 0 then return ''for ll_index = 1 to ll_count step 2ls_Ret += (of_dec2hex(asc(lc_data[ll_index + 1])) + of_dec2hex(asc(lc_data[ll_index])))nextRETURN   ls_Retend functionpublic function string of_unicode_to_gb (string as_source);//unicode→gb2312STRING ls_Ret,ls_NULL,ls_tmp Char lc_Data[] LONG ll_Count,ll_Index,ll_Pos,ll_Len BOOLEAN lb_UseDefault   =   FALSE ll_Count   =   Len(as_Source)   /   2   FOR   ll_Index   =   1   TO   ll_Count IF   ll_Index   /   2   =   INT(ll_Index   /   2)   THEN ll_Pos   =   ll_Index   -   1 ELSE ll_Pos   =   ll_Index   +   1 END   IF lc_Data[ll_Pos]   =   Char(of_hex2dec(Mid(as_Source,(ll_Index   -   1)   *   2   +   1,2))) NEXT lc_Data[ll_Index   +   1]   =   Char(0) SetNULL(ls_NULL) ls_tmp   =   lc_Data //首先获取转换后返回串的长度 ll_Len   =   WideCharToMultiByte(936,0,lc_Data,-1,ls_NULL,0,ls_NULL,lb_UseDefault) //分配空间 ls_Ret   =   Space(ll_Len) //获取转换后返回串内容 WideCharToMultiByte(936,0,lc_Data,-1,ls_Ret,ll_Len,ls_NULL,lb_UseDefault) RETURN   ls_Ret end functionpublic function string of_unicode_to_utf8 (string as);//unicode→utf8/*Unicode编码(16进制)      UTF-8 字节流(二进制)   0000 - 007F         0xxxxxxx   0080 - 07FF         110xxxxx 10xxxxxx   0800 - FFFF         1110xxxx 10xxxxxx 10xxxxxx */string ls, ls_sub, ls_utfint i, jlong llll = len(as)for i = 1 to ll step 4ls_sub = mid(as, i, 4)choose case ls_subcase '0000' to '007F'ls_sub = '0' + right('000000' + of_hex2bin( ls_sub), 7)case '0080' to '07FF'ls_sub = right('0000000000' + of_hex2bin( ls_sub), 11)ls_sub = '110' + left(ls_sub, 5) + '10' + right(ls_sub,6)case else //'0800' to 'FFFF'ls_sub = right('0000000000000000' + of_hex2bin( ls_sub), 16)ls_sub = '1110' + left(ls_sub, 4) + '10' + mid(ls_sub,5, 6) + '10' + mid(ls_sub, 11, 6)end choosefor j = 1 to len(ls_sub) step 8ls +=  '%' + of_bin2hex(mid(ls_sub, j, 4)) + of_bin2hex(mid(ls_sub, j + 4, 4))nextnextreturn lsend functionprivate function string of_dec2hex (long al);long  l,m,n string  ls ,a if al = 0 then return '00'l  =  al do  while  l  >  0 m  =  mod(l  ,16  ) l  =  Truncate((l/16),0) if m < 10 thena = string(m)elsea = char(m + 55)end ifls  =  a  +  ls loop return ls end functionprivate function long of_hex2dec (string as);long j,k,l,m,n string ls as = upper(as)for j = 1 to len(as) ls = mid(as,(len(as) - j +1 ),1) choose case ls case 'A','B','C','D','E','F' l = asc(ls) - 55case elsel = long(ls)end choose m = 16^(j - 1)n += m * l next return n end functionpublic function string of_uft8_to_unicode (string as);//utf8→unicodestring ls/*Unicode编码(16进制)      UTF-8 字节流(二进制)   0000 - 007F         0xxxxxxx   0080 - 07FF         110xxxxx 10xxxxxx   0800 - FFFF         1110xxxx 10xxxxxx 10xxxxxx */return lsend functionpublic function string of_hex2bin (string s_hex);//16进制→2进制string s_bin, s_return, s_tmpint    ifor i = 1 to len(s_hex)s_tmp = mid(s_hex, i, 1)choose case s_tmpcase "0"s_return += "0000"case "1"s_return += "0001"case "2"s_return += "0010"case "3"s_return += "0011"case "4"s_return += "0100"case "5"s_return += "0101"case "6"s_return += "0110"case "7"s_return += "0111"case "8"s_return += "1000"case "9"s_return += "1001"case "A"s_return += "1010"case "B"s_return += "1011"case "C"s_return += "1100"case "D"s_return += "1101"case "E"s_return += "1110"case "F"s_return += "1111"case elseend choosenextreturn s_returnend functionpublic function string of_bin2hex (string s_binary);//2进制→16进制string s_bin, s_return, s_tmpint    ifor i = 1 to len(s_binary) step 4s_tmp = mid(s_binary, i, 4)choose case s_tmpcase "0000"s_return += "0"case "0001"s_return += "1"case "0010"s_return += "2"case "0011"s_return += "3"case "0100"s_return += "4"case "0101"s_return += "5"case "0110"s_return += "6"case "0111"s_return += "7"case "1000"s_return += "8"case "1001"s_return += "9"case "1010"s_return += "A"case "1011"s_return += "B"case "1100"s_return += "C"case "1101"s_return += "D"case "1110"s_return += "E"case "1111"s_return += "F"case elseend choosenextreturn s_returnend functionpublic function string of_gb_to_utf8 (string as);//gb2312→unicode→utf8return of_unicode_to_utf8( of_gb_to_unicode( as))end functionon n_cst_unicode_cvt.createcall super::createTriggerEvent( this, "constructor" )end onon n_cst_unicode_cvt.destroyTriggerEvent( this, "destructor" )call super::destroyend on