中文问题的一些建议
来源:互联网 发布:linux mint 安装 编辑:程序博客网 时间:2024/05/19 19:59
<script type="text/javascript">google_ad_client = "pub-8800625213955058";/* 336x280, 创建于 07-11-21 */google_ad_slot = "0989131976";google_ad_width = 336;google_ad_height = 280;//</script><script type="text/javascript"src="http://pagead2.googlesyndication.com/pagead/show_ads.js"></script>很多朋友都在开发时遇到中文问题,现在将我收集到的一些转换函数给大家公布,希望有帮助。 一般来说java都是以unicode进行编码显示,而中文常用的编码有GB2312,和UTF-8, (不是所有输入的中文都是UNICODE,大家需要注意确认)。大家在传中文时需要自己拼结。 要把GB2312或BIG5转换成unicode 得用: unicodeString = new String(myString.getBytes(), "GB2312"); 或 unicodeString = new String(myString.getBytes(), "Big5"); 但是在一般的手机上不同的特性可能并不支持GB2312和Big5,我所知道的moto的手机就不支持。 所以,以下函数可能用的上。(注明:并非我写的,但是都是正确的) class transCN{ static public String convertUTF8String2Unicode(String instr) throws IOException { //byte[] strbytes = instr.getBytes(); int charindex = instr.length(); int actualValue; int inputValue; StringBuffer sbtemp = new StringBuffer(); for (int i = 0; i < charindex;) { actualValue = -1; inputValue = instr.charAt(i ); inputValue &= 0xff; if ((inputValue & 0x80) == 0) { actualValue = inputValue; } else if ((inputValue & 0xF8) == 0xF0) { actualValue = (inputValue & 0x1f) << 18; int nextByte = instr.charAt(i ) & 0xff; if ((nextByte & 0xC0) != 0x80) throw new IOException("Invalid UTF-8 format"); actualValue = (nextByte & 0x3F) << 12; nextByte = instr.charAt(i ) & 0xff; if ((nextByte & 0xC0) != 0x80) throw new IOException("Invalid UTF-8 format"); actualValue = (nextByte & 0x3F) << 6; nextByte = instr.charAt(i ) & 0xff; if ((nextByte & 0xC0) != 0x80) throw new IOException("Invalid UTF-8 format"); actualValue = (nextByte & 0x3F); } else if ((inputValue & 0xF0) == 0xE0) { actualValue = (inputValue & 0x1f) << 12; int nextByte = instr.charAt(i ) & 0xff; if ((nextByte & 0xC0) != 0x80) throw new IOException("Invalid UTF-8 format"); actualValue = (nextByte & 0x3F) << 6; nextByte = instr.charAt(i ) & 0xff; if ((nextByte & 0xC0) != 0x80) throw new IOException("Invalid UTF-8 format"); actualValue = (nextByte & 0x3F); } else if ((inputValue & 0xE0) == 0xC0) { actualValue = (inputValue & 0x1f) << 6; int nextByte = instr.charAt(i ) & 0xff; if ((nextByte & 0xC0) != 0x80) throw new IOException("Invalid UTF-8 format"); actualValue = (nextByte & 0x3F); } sbtemp.append((char) actualValue); } return sbtemp.toString(); } public static byte[] convertUnicode2UTF8Byte(String instr) { int len = instr.length(); byte[] abyte = new byte[len << 2]; int j = 0; for (int i = 0; i < len; i ) { char c = instr.charAt(i); if (c < 0x80) { abyte[j ] = (byte) c; } else if (c < 0x0800) { abyte[j ] = (byte) (((c >> 6) & 0x1F) | 0xC0); abyte[j ] = (byte) ((c & 0x3F) | 0x80); } else if (c < 0x010000) { abyte[j ] = (byte) (((c >> 12) & 0x0F) | 0xE0); abyte[j ] = (byte) (((c >> 6) & 0x3F) | 0x80); abyte[j ] = (byte) ((c & 0x3F) | 0x80); } else if (c < 0x200000) { abyte[j ] = (byte) (((c >> 18) & 0x07) | 0xF8); abyte[j ] = (byte) (((c >> 12) & 0x3F) | 0x80); abyte[j ] = (byte) (((c >> 6) & 0x3F) | 0x80); abyte[j ] = (byte) ((c & 0x3F) | 0x80); } } byte[] retbyte = new byte[j]; for (int i = 0; i < j; i ) { retbyte[i] = abyte[i]; } return retbyte; } public static String ISO106462Unicode(byte[] myByte){ String result=new String(""); StringBuffer sb = new StringBuffer(""); try { /*将字符串转换成byte数组*/ //byte[] myByte= str.getBytes("ISO10646"); int len = myByte.length; for(int i=0;i < len;i=i 2) { byte hiByte=myByte[i]; byte loByte=myByte[i 1]; int ch =(int)hiByte << 8; ch = ch & 0xff00; ch =(int)loByte & 0xff; sb.append((char)ch); } result = new String(sb.toString()); } catch(Exception e) { System.out.println("Encoding Error"); } return result; } public static byte[] Unicode2Byte(String s) { int len = s.length(); byte abyte[] = new byte[len << 1]; int j = 0; for(int i = 0; i < len; i ) { char c = s.charAt(i); abyte[j ] = (byte)(c & 0xff); abyte[j ] = (byte)(c >> 8); } return abyte; } }