【编码与乱码】(04)---输出时的编码与乱码

来源：互联网发布：福彩3d2017年开奖数据编辑：程序博客网时间：2024/05/15 12:35

package example.encoding;import java.io.BufferedWriter;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.FileWriter;import java.io.IOException;import java.io.OutputStreamWriter;import java.io.PrintWriter;import java.io.UnsupportedEncodingException;import java.io.Writer;import java.nio.charset.Charset;import java.util.Iterator;import java.util.Set;import java.util.SortedMap;/** * <pre> * The Class IOEncodeTest is a tester class for java encoding. Mainnaly contains * two parts: *  1.Test written by FileWriter, with or without given character encoding value *  2.Test written by OutputStreamWriter, with or without given character encoding value * </pre> *  * @author Paul Lin * @version 1.0 */public class OutputEncodingTest {    private static String word = "Hello world! 中国";    private static final String ENCODING_EN = "ISO-8859-1";    private static final String ENCODING_CN = "GB2312";    private static final String ENCODING_UTF = "UTF-8";    private static final String DEFAULT_SYSTEM_ENCODING = System            .getProperty("file.encoding");    /**     * The main method.     *      * @param args the arguments     */    public static void main(String args[]) {        OutputEncodingTest tester = new OutputEncodingTest();        tester.testFileWriter();        tester.testOutputStreamWriter();    }    /**     * Test file writer.     */    public void testFileWriter() {        // Create test result folder        String resultFolder = createResultFolder(System                .getProperty("user.language"), getBasePath());        // With default platform encoding        writeByFileWriter(word, resultFolder);        // With given system file.encoding property        writeByFileWriter(word, ENCODING_EN, resultFolder);        writeByFileWriter(word, ENCODING_CN, resultFolder);        writeByFileWriter(word, ENCODING_UTF, resultFolder);    }    /**     * Test output stream writer.     */    public void testOutputStreamWriter() {        // Create test result folder        String resultFolder = createResultFolder(System                .getProperty("user.language"), getBasePath());        // With default platform encoding        writeByOutputStreamWriter(word, resultFolder);        // With given system file.encoding property        writeByOutputStreamWriter(word, ENCODING_EN, resultFolder);        writeByOutputStreamWriter(word, ENCODING_CN, resultFolder);        writeByOutputStreamWriter(word, ENCODING_UTF, resultFolder);    }    /**     * Prints the available charset.     */    public void printAvailableCharset() {        SortedMap<String, Charset> charsets = Charset.availableCharsets();        Set<String> charsetKeys = charsets.keySet();        System.out.println("\n<<<< Canonical name -- Display name -- "                + " Can encode >>>>\n");        Iterator<String> i = charsetKeys.iterator();        while (i.hasNext()) {            String key = (String) i.next();            Charset charset = (Charset) charsets.get(key);            String displayName = charset.displayName();            boolean canEncode = charset.canEncode();            System.out.println(key + " - " + displayName + " - " + canEncode);        }    }    /**     * Write by file writer.     *      * @param content the content     */    private void writeByFileWriter(String content, String destination) {        String defaultEncoding = System.getProperty("file.encoding");        System.out.println("Using default system encoding: " + defaultEncoding);        writeByFileWriter(content, defaultEncoding, destination);    }    /**     * Write by file writer.     *      * @param content the content     * @param encoding the encoding     */    private void writeByFileWriter(String content, String encoding,            String destination) {        printDebugInformation("FileWriter", encoding, content);        // Get system default encoding        String defaultEncoding = System.getProperty("file.encoding");        // Reset underlying platform character encoding        if (!defaultEncoding.equalsIgnoreCase(encoding)) {            System.setProperty("file.encoding", encoding);        }        // Save as file with given encoding value        String file = returnFileName(destination, "write_by_filewriter_",                encoding, ".txt");        try {            Writer writer = new BufferedWriter(new FileWriter(file));            writer.write(content);            writer.flush();            writer.close();        } catch (IOException ioe) {            ioe.printStackTrace();        }        // Reset character encoding to system default value        resetDefaultSystemEncoding();    }    /**     * Write by output stream writer.     *      * @param content the content     */    private void writeByOutputStreamWriter(String content, String destination) {        String defaultEncoding = System.getProperty("file.encoding");        System.out.println("Using default system encoding: " + defaultEncoding);        writeByOutputStreamWriter(content, defaultEncoding, destination);    }    /**     * Write by output stream writer.     *      * @param content the content     * @param encoding the encoding     */    private void writeByOutputStreamWriter(String content, String encoding,            String destination) {        printDebugInformation("OutputStreamWriter", encoding, content);        // Save as file with given encoding value        String file = returnFileName(destination,                "write_by_outputStreamWriter_", encoding, ".txt");        try {            Writer writer = new PrintWriter(                    new BufferedWriter(new OutputStreamWriter(                            new FileOutputStream(file), encoding)));            writer.write(content);            writer.flush();            writer.close();        } catch (FileNotFoundException fnfe) {            fnfe.printStackTrace();        } catch (UnsupportedEncodingException uee) {            uee.printStackTrace();        } catch (IOException ioe) {            ioe.printStackTrace();        }        // Reset character encoding to system default value        resetDefaultSystemEncoding();    }    /**     * Gets the base path.     *      * @return the base path     */    private String getBasePath() {        StringBuffer finalPath = new StringBuffer();        String dir = System.getProperty("user.dir");        finalPath.append(dir);        finalPath.append((dir.endsWith("\\") || dir.endsWith("/")) ? "" : "/");        finalPath.append("src").append("/");        finalPath.append("example").append("/");        finalPath.append("encoding").append("/");        return finalPath.toString();    }    /**     * Return file name.     *      * @param basePath the base path     * @param prefix the prefix     * @param content the content     * @param subfix the subfix     *      * @return the string     */    private String returnFileName(String basePath, String prefix,            String content, String subfix) {        StringBuffer name = new StringBuffer(basePath);        if ((!basePath.endsWith("\\") && (!basePath.endsWith("/")))) {            name.append("/");        }        name.append(prefix);        name.append(content);        name.append(subfix);        return name.toString();    }    /**     * Creates the result folder.     *      * @param platform the platform     * @param fullPath the full path     *      * @return the string     */    private String createResultFolder(String platform, String fullPath) {        StringBuffer resultFolder = new StringBuffer();        if (fullPath.endsWith("\\") || fullPath.endsWith("/")) {            resultFolder.append(fullPath);        } else {            resultFolder.append(fullPath).append("/");        }        resultFolder.append("Test_Result_Of_").append(platform);        File file = new File(resultFolder.toString());        if (!file.exists()) {            file = new File(resultFolder.toString());            file.mkdir();            return resultFolder.toString();        } else {            return file.getAbsolutePath();        }    }    /**     * Prints the debug information.     *      * @param writerName the writer name     * @param encoding the encoding     */    private void printDebugInformation(String writerName, String encoding,            String content) {        StringBuffer msg = new StringBuffer();        msg.append("\n<<<<----------------------------------");        msg.append(" Test written by ").append(writerName);        msg.append(" with encoding ").append(encoding);        msg.append(" ---------------------------------->>>>\n");        msg.append(" \nOriginal string: ").append(content).append("\n");        System.out.println(msg.toString());    }    /**     * Reset default system encoding.     */    private void resetDefaultSystemEncoding() {        System.setProperty("file.encoding", DEFAULT_SYSTEM_ENCODING);    }}

【1】中文平台情况下，测试结果如下：

1.如果采用FileWriter，并指定GBK编码：编码后字符长度为15，可以正常保存和读取
2.如果采用FileWriter，并指定UTF-8编码：编码后字节长度为16，可以正常保存和读取
3.如果采用FileWriter，并指定ISO8859-1编码：编码后字节长度为17，可以正常保存和读取

4.如果采用OutputStreamWriter，并指定GBK编码：编码后字符长度为15，可以正常保存和读取
5.如果采用OutputStreamWriter，并指定UTF-8编码：编码后字节长度为16，可以正常保存和读取
6.如果采用OutputStreamWriter，并指定ISO-8859-1编码：编码后字节长度为17，变成?

【2】英文平台情况下，测试结果如下：

1.如果采用FileWriter，并指定GBK编码：编码后字符长度为15，变成?
2.如果采用FileWriter，并指定UTF-8编码：编码后字节长度为16，变成?
3.如果采用FileWriter，并指定ISO-8859-1编码：编码后字节长度为17，变成?

4.如果采用OutputStreamWriter，并指定GBK编码：编码后字符长度为15，可以正常保存和读取
5.如果采用OutputStreamWriter，并指定UTF-8编码：编码后字节长度为16，可以正常保存和读取
6.如果采用OutputStreamWriter，并指定ISO-8859-1编码：编码后字节长度为17，变成?

【结论】

①在中文平台下，如果使用FileWriter，不论你如何设置字符集都不会起作用。因为它采用的是默认的系统字符集。即便你设置了System.setProperty("file.encoding", "ISO-8859-1")，或者在运行时给予参数-Dfile.encoding=UTF-8都不会起作用。你会发现它最终还是都已"GB2312"或者"GBK"的方式保存。

在中文平台下，如果使用OutputStreamWriter，则在后台写入时会把字符流转换成字节流，此时指定的编码字符集就起作用了。可以看到在指定GBK、UTF-8的情况下中文可以正常的保存和读取，同时文件按照我们给定的方式保存了。而对于ISO-8859-1则变成了?，这再次证明了采用ISO-8859-1是不能保存中文的，而且会因为中文编码在ISO-8859-1的编码中找不到对应的字符而默认转换成?。

②在英文平台下，如果使用FileWriter，不论你如何设置字符集同样都不会起作用。所有的文件都将按照ISO-8859-1的编码方式保存，毫无疑问地变成了?。在英文平台下，如果使用OutputStreamWriter，则只有当我们把字符和文件的编码方式正确设置为GBK、UTF-8的情况下，中文才能正确的保存并显示。

③通过上述的实验证明，为了确保在不同的平台下，客户端输入的中文可以被正确地解析、保存、读取。最好的办法就是使用OutputStreamWriter配合UTF-8编码。

如果不想使用UTF-8编码，那么可以考虑使用GB2312，不建议使用GBK、GB18030。因为对于某些老式的文本编辑器，甚至不支持GBK、GB18030的编码，但是对于GB2312则是一定支持的。因为前两者都不是国标但后者是。

④关于String的getBytes()，getBytes(encoding)和new String(bytes, encoding)这三个方法，非常值得注意：

A.getBytes()：使用平台默认的编码方式(通过file.encoding属性获取)方式来将字符串转换成byte[]。得到的是字符串最原始的字节编码值。

B.getBytes(NAME_OF_CHARSET)：使用指定的编码方式将字符串转换成byte[]，如果想要得到正确的字节数组，程序员必须给出正确的NAME_OF_CHARSET。否则得到的就不会得到正确的结果。

C.new String(bytes, encoding)：如果我们在客户端使用UTF-8编码的JSP页面发出请求，浏览器编码后的UTF-8字节会以ISO-8859-1的形式传递到服务器端。所以要得到经HTTP协议传输的原始字节，我们需要先调用getBytes("ISO-8859-1")得到原始的字节，但由于我们客户端的原始编码是UTF-8，如果继续按照ISO-8859-1解码，那么得到的将不是一个中文字符，而是3个乱码的字符。所以我们需要再次调用new String(bytes,"UTF-8")，将字节数组按照UTF-8的格式，每3个一组进行解码，才能还原为客户端的原始字符。

D.String的getBytes()、getBytes(NAME_OF_CHARSET)方法都是比较微妙的方法，原则上：传输时采用的是什么编码，我们就需要按照这种编码得到字节。new String(bytes, NAME_OF_CHARSET)则更加需要小心，原则上：客户端采用的是什么编码，那么这里的NAME_OF_CHARSET就必须和客户端保持一致。

   例如JSP页面是GBK，那么我们接收页面传递而来的参数时就必须使用new String(parameter.getBytes("ISO-8859-1"), "GBK");如果使用了错误的解码方式，如使用了UTF-8，那么得到的很有可能就是乱码了。

   也就是说：

GBK--->ISO-8859-1--->GBK、

UTF-8--->ISO-8859-1--->UTF-8

转换过程是没有问题的。但是GBK--->ISO-8859-1--->UTF-8、UTF-8--->ISO-8859-1--->GBK的字节直接转码则可能导致乱码，需要另外的转换过程。

记住：

谨慎地使用getBytes(NAME_OF_CHARSET)和new String(bytes, NAME_OF_CHARSET)，除非你很清楚的知道原始的字符编码和传输协议使用的编码。

推荐使用基于服务器的配置、过滤器设置request/response的characterEncoding、content type属性。还有就是JSP页面的pageEncoding属性、HTML meta元素的content type属性。尽量避免频繁的在代码中进行字符串转码，即降低了效率又增加了风险