JAVA 无BOM utf8文件编码判断

来源:互联网 发布:淘宝网李宁运动服 编辑:程序博客网 时间:2024/06/10 16:03


/** * @Comments :获取文件编码格式 * @param fileName * @return */private static String getCharset(File fileName) {BufferedInputStream bin;int bom = 0;String str = " ";String str2 = "";try {bin = new BufferedInputStream(new FileInputStream(fileName));bom = (bin.read() << 8) + bin.read();// 获取两个字节内容,如果文件无BOM信息,则通过判断字的字节长度区分编码格式byte bs[] = new byte[10];while(str.matches("\\s+\\w*")){bin.read(bs);str = new String(bs, "UTF-8");}str2 = new String(bs, "GBK");} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}String code = null;// 有BOMswitch (bom) {case 0xefbb:code = "UTF-8";break;case 0xfffe:code = "Unicode";break;case 0xfeff:code = "UTF-16BE";break;default:// 无BOMif (str.length() <=str2.length()) {code = "UTF-8";} else {code = "GBK";}}return code;}


0 0