将文件内含有的特殊字符还原

来源:互联网 发布:崔永元 转基因 知乎 编辑:程序博客网 时间:2024/06/06 07:20

如下文件内容:


这里有特殊字符:\n 、\t 、\u4e0a 、\/

我要做的事,就是恢复其特殊字符的作用(而不是打印被转义后的效果)


直观的看,很容易:直接替换不就行了

line = line.replaceAll("\\n", "\r\n" );line = line.replaceAll("\\t", "\t" );line = line.replaceAll("\\/", "/" );
但是,这是错误的。新生成的文件没有任何改变。


这里有个问题以前没弄清楚:文本文件中为 \n ,在读取后字符串中实际为 \\n



因而正确的替换方法为:

line = line.replaceAll("\\\\n", "\r\n" );line = line.replaceAll("\\\\t", "\t" );line = line.replaceAll("\\\\/", "/" );


接下来就是处理 Unicode码,将其还原

来源:http://www.cnblogs.com/yuxuan/archive/2011/08/02/2124904.html

/** *//****************************************************** 功能介绍:将unicode字符串转为汉字* 输入参数:源unicode字符串* 输出参数:转换后的字符串*****************************************************/static String decodeUnicode( final String dataStr ) {int start = 0;int end = 0;final StringBuffer buffer = new StringBuffer();while( start > -1 ) {end = dataStr.indexOf( "\\\\u", start + 2 );String charStr = "";if( end == -1 ) {charStr = dataStr.substring( start + 2, dataStr.length() );} else {charStr = dataStr.substring( start + 2, end);}char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。buffer.append( new Character( letter ).toString() );start = end;}return buffer.toString();}


有了 decodeUnicode 方法,接下来只需要将文件中匹配 \\uxxxx 这样的转换完即可:

static String replace( String s ){try {Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);Matcher matcher = regex.matcher(s);StringBuffer sb = new StringBuffer();while (matcher.find()) {matcher.appendReplacement(sb, decodeUnicode( matcher.group()) );}matcher.appendTail(sb);return sb.toString();} catch (Exception ex) {throw new RuntimeException( "Something error." );}}


总的转换代码:

static void readToWrite( File file ){BufferedReader bufReader = null;BufferedWriter bufWriter = null;try {bufReader = new BufferedReader( new FileReader(file) );bufWriter = buildWriter( file );String line = null;while( (line = bufReader.readLine()) != null ){line = line.replaceAll("\\\\n", "\r\n" );line = line.replaceAll("\\\\t", "\t" );line = line.replaceAll("\\\\/", "/" );line = replace( line );bufWriter.write( line );bufWriter.newLine();}} catch (IOException e) {e.printStackTrace();}finally{if( bufReader != null ){try {bufReader.close();} catch (IOException e) {e.printStackTrace();}bufReader = null;}if( bufWriter != null ){try {bufWriter.close();} catch (IOException e) {e.printStackTrace();}bufWriter = null;}}}static BufferedWriter buildWriter( File file ){BufferedWriter bufWriter = null;try {String fullName = file.getCanonicalPath();int splitPath = fullName.lastIndexOf( "\\" );String path = fullName.substring( 0, splitPath );String name = file.getName().replaceAll("\\.txt", "@\\.txt" );bufWriter = new BufferedWriter( new FileWriter( path + "\\" + name ) );return bufWriter;} catch (IOException e) {e.printStackTrace();}return null;}static String replace( String s ){try {Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);Matcher matcher = regex.matcher(s);StringBuffer sb = new StringBuffer();while (matcher.find()) {matcher.appendReplacement(sb, decodeUnicode( matcher.group()) );}matcher.appendTail(sb);return sb.toString();} catch (Exception ex) {throw new RuntimeException( "Something error." );}}/** *//****************************************************** 功能介绍:将unicode字符串转为汉字* 输入参数:源unicode字符串* 输出参数:转换后的字符串*****************************************************/static String decodeUnicode( final String dataStr ) {int start = 0;int end = 0;final StringBuffer buffer = new StringBuffer();while( start > -1 ) {end = dataStr.indexOf( "\\\\u", start + 2 );String charStr = "";if( end == -1 ) {charStr = dataStr.substring( start + 2, dataStr.length() );} else {charStr = dataStr.substring( start + 2, end);}char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。buffer.append( new Character( letter ).toString() );start = end;}return buffer.toString();}


============================================================


 

 /**将中文转为unicode 及转回中文函数转为unicode */public static void writeUnicode(final DataOutputStream out, final String value) {try {final String unicode = gbEncoding( value );final byte[] data = unicode.getBytes();final int dataLength = data.length;System.out.println( "Data Length is: " + dataLength );System.out.println( "Data is: " + value );out.writeInt( dataLength ); //先写出字符串的长度out.write( data, 0, dataLength ); //然后写出转化后的字符串} catch (IOException e) {}}public static String gbEncoding( final String gbString ) {char[] utfBytes = gbString.toCharArray();String unicodeBytes = "";for( int byteIndex = 0; byteIndex < utfBytes.length; byteIndex ++ ) {String hexB = Integer.toHexString( utfBytes[ byteIndex ] );if( hexB.length() <= 2 ) {hexB = "00" + hexB;}unicodeBytes = unicodeBytes + "\\\\u" + hexB;}System.out.println( "unicodeBytes is: " + unicodeBytes );return unicodeBytes;}