字符转换工具类

来源：互联网发布：网络钮祜禄是什么梗编辑：程序博客网时间：2024/04/29 14:47

package com.cn.citi.me;

import java.io.UnsupportedEncodingException;

public class CharsetUtil {

private static final String DATABASE_CHARSET="ISO-8859-1";
private static final String CLIENT_CHARSET="UTF-8";

public static String EntryDatabase(String text){
  if(text!=null){
   try {
    byte[] bytes=text.getBytes(CLIENT_CHARSET);
    text=new String(bytes,DATABASE_CHARSET);
   } catch (UnsupportedEncodingException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }

  }
  return text;
}
public static String FromDatabase(String text){
  if(text!=null){
   try {
    byte[] bytes=text.getBytes(DATABASE_CHARSET);
    text=new String(bytes,CLIENT_CHARSET);
   } catch (UnsupportedEncodingException e) {
    e.printStackTrace();
   }

  }

  return text;

}

}

=================================================================================

package com.citi.risk.credit.rapid.infra.util;

import java.io.UnsupportedEncodingException;

public class CharsetUtil {

public static final String DATABASE_CHARSET  = "ISO-8859-1";
public static final String WINDOWS_CHARSET  = "cp1252";

public static final String GBK_CHARSET   = "GBK";
public static final char GBK_CODEPOINT_FROM = 0x8140;
public static final char GBK_CODEPOINT_TO = 0xFEFF;

/**
* Convert a string from one encoding to another
*
* @param text
* @param fromEnc
* @param toEnc
* @return
*/
public static String convertCharset(String text, String fromEnc, String toEnc)
{
  if (text!=null)
  {
   try {
    // convert to bytes using 'from' encoding
    byte[] bytes = text.getBytes(fromEnc);

    // repackage string using specified encoding
    text = new String(bytes, toEnc);
   }
   catch (UnsupportedEncodingException ex)
   {
    // unlikely occurrence, just return original text
   }
  }

  return text;
}

/**
* Convert a string from one encoding to another, detecting the most
* likely encoding to use.
*
* @param text
* @param fromEnc
* @return
*/
public static String autoConvertCharset(String text, String fromEnc)
{
  if (text!=null)
  {
   try {
    // convert to bytes using 'from' encoding
    byte[] bytes = text.getBytes(fromEnc);

    String toEnc = detectGBKCharset(bytes);

    if (toEnc != null) {
     // repackage string using detected encoding
     text = new String(bytes, toEnc);
    } else {
     // no encoding detected, use default
     text = new String(bytes, WINDOWS_CHARSET);
    }
   }
   catch (UnsupportedEncodingException ex)
   {
    // unlikely occurrence, just return original text
   }
  }

  return text;
}

/**
* Detect the most likely string encoding for an array of bytes.
*
* @param bytes
* @return the detected encoding or null if no encoding was detected
*/
public static String detectGBKCharset(byte[] bytes)
{
  /* method 1: UniversalDetector */
  /*
  UniversalDetector detector = new UniversalDetector(null);

detector.handleData(bytes, 0, bytes.length);

detector.dataEnd();

String encoding = detector.getDetectedCharset();

  detector.reset();

  return encoding;
  */

  /* method 2: detect GBK only, else default to windows charset */
  boolean isGBK = false;

  // check bytes for presence of GBK codes
  for (int i=1; i<bytes.length; i++)
  {
   /* The full range of GBK codes is (from Wikipedia):
    * "
    *   First byte ("lead byte") 0x81 to 0xfe (or 0xa1 to 0xf9 for non-user-defined characters)
    *   Second byte 0x40 to 0xfe
    * "
    * However, the "user-defined" range of chars overlaps with common windows
    * characters such as the ms office quote characters, so this can result in
    * false positives.
    *
    * Therefore to match GBK we will check for the non-user-defined range
    * on the basis that genuine GBK text is unlikely to contain exclusively
    * user-defined characters.
    */
   if (bytes[i-1] >= (byte)0x81 && bytes[i-1] <= (byte)0xFE &&
    ((bytes[i] >= (byte)0x40 && bytes[i] <= (byte)0x7E)||(bytes[i] >= (byte)0x80 && bytes[i] <= (byte)0xFE)))
   {
    isGBK = true;
    break;
   }
  }

  if (isGBK)
  {
   return GBK_CHARSET;
  }
  else
  {
   return WINDOWS_CHARSET;
  }
}

/**
* converts client string from unicode to database format
*
* db text is stored in 8-bit latin encoding,
* but actually represents one of two encodings:
* - windows cp1252
* - GBK
*
* if the client text includes chinese characters, GBK encoding
* is used, otherwise Windows encoding is used.
*
* @param clientText
* @return
*/
public static String databaseEncode(String clientText)
{
  // default encoding is windows cp1252
  String fromEnc = WINDOWS_CHARSET;

  //If a non-null string passed in then check for Chinese
  if (clientText!=null)
  {
   try {
    byte[] bytes = clientText.getBytes(GBK_CHARSET);
    fromEnc = detectGBKCharset(bytes);

   } catch (UnsupportedEncodingException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }

  // encode text using appropriate encoding
   return convertCharset(clientText, fromEnc, DATABASE_CHARSET);
  }
  else
   //if null string then just return same string
   return clientText;
}

/**
* converts database string into std unicode for client to display
*
* db text is stored in 8-bit latin encoding,
* but actually represents one of two encodings:
* - windows cp1252
* - GBK
*
* client text is 16-bit unicode (java's internal format) which encodes
* non-ascii characters as 2-byte values.
*
* @param dbText
* @return
*/
public static String databaseDecode(String dbText)
{
  return autoConvertCharset(dbText, DATABASE_CHARSET);
}

}