字符转换工具类

来源:互联网 发布:网络钮祜禄是什么梗 编辑:程序博客网 时间:2024/04/29 14:47

package com.cn.citi.me;

import java.io.UnsupportedEncodingException;

public class CharsetUtil {
 
 private  static final String DATABASE_CHARSET="ISO-8859-1";
 private  static final String CLIENT_CHARSET="UTF-8";

 public static String EntryDatabase(String text){
  if(text!=null){
   try {
    byte[] bytes=text.getBytes(CLIENT_CHARSET);
    text=new String(bytes,DATABASE_CHARSET);
   } catch (UnsupportedEncodingException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }
   
  }
  return text;
 }
 public static String FromDatabase(String text){
  if(text!=null){
   try {
    byte[] bytes=text.getBytes(DATABASE_CHARSET);
    text=new String(bytes,CLIENT_CHARSET);
   } catch (UnsupportedEncodingException e) {
    e.printStackTrace();
   }
   
  }
  
  return text;
  
 }

}

 

 

 

 

 

 

 

 

 

 

=================================================================================

 

package com.citi.risk.credit.rapid.infra.util;

import java.io.UnsupportedEncodingException;

public class CharsetUtil {

 public static final String DATABASE_CHARSET  = "ISO-8859-1";
 public static final String WINDOWS_CHARSET  = "cp1252";
 
 public static final String GBK_CHARSET   = "GBK";
 public static final char GBK_CODEPOINT_FROM = 0x8140;
 public static final char GBK_CODEPOINT_TO = 0xFEFF;

 /**
  * Convert a string from one encoding to another
  *
  * @param text
  * @param fromEnc
  * @param toEnc
  * @return
  */
 public static String convertCharset(String text, String fromEnc, String toEnc)
 {
  if (text!=null)
  {
   try {
    // convert to bytes using 'from' encoding
    byte[] bytes = text.getBytes(fromEnc);
    
    // repackage string using specified encoding 
    text = new String(bytes, toEnc);
   }
   catch (UnsupportedEncodingException ex)
   {
    // unlikely occurrence, just return original text
   }
  }
  
  return text;
 }

 /**
  * Convert a string from one encoding to another, detecting the most
  * likely encoding to use.
  *
  * @param text
  * @param fromEnc
  * @return
  */
 public static String autoConvertCharset(String text, String fromEnc)
 {
  if (text!=null)
  {
   try {
    // convert to bytes using 'from' encoding
    byte[] bytes = text.getBytes(fromEnc);
    
    String toEnc = detectGBKCharset(bytes);

    if (toEnc != null) {      
     // repackage string using detected encoding 
     text = new String(bytes, toEnc);
    } else {      
     // no encoding detected, use default
     text = new String(bytes, WINDOWS_CHARSET);
    }          
   }
   catch (UnsupportedEncodingException ex)
   {
    // unlikely occurrence, just return original text
   }
  }
  
  return text;
 }
 
 /**
  * Detect the most likely string encoding for an array of bytes.
  * 
  * @param bytes
  * @return the detected encoding or null if no encoding was detected
  */
 public static String detectGBKCharset(byte[] bytes)
 {
  /* method 1: UniversalDetector */
  /*
  UniversalDetector detector = new UniversalDetector(null);     

  detector.handleData(bytes, 0, bytes.length);    

  detector.dataEnd();     

  String encoding = detector.getDetectedCharset();

  detector.reset();    
  
  return encoding;
  */
  
  /* method 2: detect GBK only, else default to windows charset */
  boolean isGBK = false;
  
  // check bytes for presence of GBK codes
  for (int i=1; i<bytes.length; i++)
  {
   /* The full range of GBK codes is (from Wikipedia):
    *  "
    *   First byte ("lead byte") 0x81 to 0xfe (or 0xa1 to 0xf9 for non-user-defined characters)
    *   Second byte 0x40 to 0xfe
    *  "
    * However, the "user-defined" range of chars overlaps with common windows
    * characters such as the ms office quote characters, so this can result in
    * false positives.
    *
    * Therefore to match GBK we will check for the non-user-defined range
    * on the basis that genuine GBK text is unlikely to contain exclusively
    * user-defined characters.
    */
   if (bytes[i-1] >= (byte)0x81 && bytes[i-1] <= (byte)0xFE &&
    ((bytes[i] >= (byte)0x40 && bytes[i] <= (byte)0x7E)||(bytes[i] >= (byte)0x80 && bytes[i] <= (byte)0xFE)))
   {
    isGBK = true;
    break;
   }
  }
  
  if (isGBK)
  {
   return GBK_CHARSET;
  }
  else
  {
   return WINDOWS_CHARSET;
  }
 }
 
 /**
  * converts client string from unicode to database format
  *
  * db text is stored in 8-bit latin encoding,
  * but actually represents one of two encodings:
  *  - windows cp1252
  *  - GBK
  *
  * if the client text includes chinese characters, GBK encoding
  * is used, otherwise Windows encoding is used.
  *
  * @param clientText
  * @return
  */
 public static String databaseEncode(String clientText)
 {
  // default encoding is windows cp1252
  String fromEnc = WINDOWS_CHARSET;

  //If a non-null string passed in then check for Chinese
  if (clientText!=null)
  {
   try {
    byte[] bytes = clientText.getBytes(GBK_CHARSET);
    fromEnc = detectGBKCharset(bytes); 

   } catch (UnsupportedEncodingException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }
  
  // encode text using appropriate encoding
   return convertCharset(clientText, fromEnc, DATABASE_CHARSET);
  }
  else
   //if null string then just return same string
   return clientText;
 }
 
 /**
  * converts database string into std unicode for client to display
  *
  * db text is stored in 8-bit latin encoding,
  * but actually represents one of two encodings:
  *  - windows cp1252
  *  - GBK
  *
  * client text is 16-bit unicode (java's internal format) which encodes
  * non-ascii characters as 2-byte values.
  *
  * @param dbText
  * @return
  */
 public static String databaseDecode(String dbText)
 {
  return autoConvertCharset(dbText, DATABASE_CHARSET);
 }
 
 
}

原创粉丝点击