字符转换工具类
来源:互联网 发布:网络钮祜禄是什么梗 编辑:程序博客网 时间:2024/04/29 14:47
package com.cn.citi.me;
import java.io.UnsupportedEncodingException;
public class CharsetUtil {
private static final String DATABASE_CHARSET="ISO-8859-1";
private static final String CLIENT_CHARSET="UTF-8";
public static String EntryDatabase(String text){
if(text!=null){
try {
byte[] bytes=text.getBytes(CLIENT_CHARSET);
text=new String(bytes,DATABASE_CHARSET);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return text;
}
public static String FromDatabase(String text){
if(text!=null){
try {
byte[] bytes=text.getBytes(DATABASE_CHARSET);
text=new String(bytes,CLIENT_CHARSET);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
return text;
}
}
=================================================================================
package com.citi.risk.credit.rapid.infra.util;
import java.io.UnsupportedEncodingException;
public class CharsetUtil {
public static final String DATABASE_CHARSET = "ISO-8859-1";
public static final String WINDOWS_CHARSET = "cp1252";
public static final String GBK_CHARSET = "GBK";
public static final char GBK_CODEPOINT_FROM = 0x8140;
public static final char GBK_CODEPOINT_TO = 0xFEFF;
/**
* Convert a string from one encoding to another
*
* @param text
* @param fromEnc
* @param toEnc
* @return
*/
public static String convertCharset(String text, String fromEnc, String toEnc)
{
if (text!=null)
{
try {
// convert to bytes using 'from' encoding
byte[] bytes = text.getBytes(fromEnc);
// repackage string using specified encoding
text = new String(bytes, toEnc);
}
catch (UnsupportedEncodingException ex)
{
// unlikely occurrence, just return original text
}
}
return text;
}
/**
* Convert a string from one encoding to another, detecting the most
* likely encoding to use.
*
* @param text
* @param fromEnc
* @return
*/
public static String autoConvertCharset(String text, String fromEnc)
{
if (text!=null)
{
try {
// convert to bytes using 'from' encoding
byte[] bytes = text.getBytes(fromEnc);
String toEnc = detectGBKCharset(bytes);
if (toEnc != null) {
// repackage string using detected encoding
text = new String(bytes, toEnc);
} else {
// no encoding detected, use default
text = new String(bytes, WINDOWS_CHARSET);
}
}
catch (UnsupportedEncodingException ex)
{
// unlikely occurrence, just return original text
}
}
return text;
}
/**
* Detect the most likely string encoding for an array of bytes.
*
* @param bytes
* @return the detected encoding or null if no encoding was detected
*/
public static String detectGBKCharset(byte[] bytes)
{
/* method 1: UniversalDetector */
/*
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
return encoding;
*/
/* method 2: detect GBK only, else default to windows charset */
boolean isGBK = false;
// check bytes for presence of GBK codes
for (int i=1; i<bytes.length; i++)
{
/* The full range of GBK codes is (from Wikipedia):
* "
* First byte ("lead byte") 0x81 to 0xfe (or 0xa1 to 0xf9 for non-user-defined characters)
* Second byte 0x40 to 0xfe
* "
* However, the "user-defined" range of chars overlaps with common windows
* characters such as the ms office quote characters, so this can result in
* false positives.
*
* Therefore to match GBK we will check for the non-user-defined range
* on the basis that genuine GBK text is unlikely to contain exclusively
* user-defined characters.
*/
if (bytes[i-1] >= (byte)0x81 && bytes[i-1] <= (byte)0xFE &&
((bytes[i] >= (byte)0x40 && bytes[i] <= (byte)0x7E)||(bytes[i] >= (byte)0x80 && bytes[i] <= (byte)0xFE)))
{
isGBK = true;
break;
}
}
if (isGBK)
{
return GBK_CHARSET;
}
else
{
return WINDOWS_CHARSET;
}
}
/**
* converts client string from unicode to database format
*
* db text is stored in 8-bit latin encoding,
* but actually represents one of two encodings:
* - windows cp1252
* - GBK
*
* if the client text includes chinese characters, GBK encoding
* is used, otherwise Windows encoding is used.
*
* @param clientText
* @return
*/
public static String databaseEncode(String clientText)
{
// default encoding is windows cp1252
String fromEnc = WINDOWS_CHARSET;
//If a non-null string passed in then check for Chinese
if (clientText!=null)
{
try {
byte[] bytes = clientText.getBytes(GBK_CHARSET);
fromEnc = detectGBKCharset(bytes);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// encode text using appropriate encoding
return convertCharset(clientText, fromEnc, DATABASE_CHARSET);
}
else
//if null string then just return same string
return clientText;
}
/**
* converts database string into std unicode for client to display
*
* db text is stored in 8-bit latin encoding,
* but actually represents one of two encodings:
* - windows cp1252
* - GBK
*
* client text is 16-bit unicode (java's internal format) which encodes
* non-ascii characters as 2-byte values.
*
* @param dbText
* @return
*/
public static String databaseDecode(String dbText)
{
return autoConvertCharset(dbText, DATABASE_CHARSET);
}
}
- 字符转换工具类
- 字符转换工具类
- 字符类型转换工具类
- C++ 字符编码转换工具类
- 字符编码转换工具
- Unicode字符转换工具
- 进行字符操作的工具类!转换字符
- java 驼峰字符和下划线字符相互转换工具类
- 字符转换Unicode编码工具
- Java工具类——全角半角字符相互转换
- Java工具类——全角半角字符相互转换
- php 的字符编码转换工具
- Linux iconv --字符编码转换工具
- Linux 命令iconv -字符编码转换工具
- linux中字符编码转换工具iconv
- 字符工具类
- 字符编码工具类
- 字符编码工具类
- 简单的说说对GDI+坐标系的理解
- zoj-1049
- 理解MySQL——复制(Replication)
- ffmpeg+crtmpserver
- C# word中添加横线
- 字符转换工具类
- jxl架包操作excel表格
- WinDBG教程
- Repeater控件的点击事件
- onItemClick以及onItemLongClick等易模糊问题验证
- hbase Shell之简单命令说明
- Android中文件类File的详细使用说明
- 487-3279
- REST WebService与SOAP WebService的比较