获取文件编码
来源:互联网 发布:简单c语言程序代码 编辑:程序博客网 时间:2024/06/05 20:50
Java: How to auto-detect a file’s encoding
Well, I don’t know if this is the best solution, but we can test the file against various CharsetDecoders and see if any of them reports no errors. Here is a class implementing this behaviour (note: the code below will open and read the file and test it against the decoder until EOF is reached – if an error occurs it proceeds to the next decoder etc. – so if you specify a great number of charsets to be tested, or test large files, it will be slow ) :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import
java.io.BufferedInputStream;
import
java.io.File;
import
java.io.FileInputStream;
import
java.io.FileNotFoundException;
import
java.io.IOException;
import
java.io.InputStreamReader;
import
java.nio.ByteBuffer;
import
java.nio.charset.CharacterCodingException;
import
java.nio.charset.Charset;
import
java.nio.charset.CharsetDecoder;
/**
*
* @author Georgios Migdos
*/
public
class
CharsetDetector {
public
Charset detectCharset(File f, String[] charsets) {
Charset charset =
null
;
for
(String charsetName : charsets) {
charset = detectCharset(f, Charset.forName(charsetName));
if
(charset !=
null
) {
break
;
}
}
return
charset;
}
private
Charset detectCharset(File f, Charset charset) {
try
{
BufferedInputStream input =
new
BufferedInputStream(
new
FileInputStream(f));
CharsetDecoder decoder = charset.newDecoder();
decoder.reset();
byte
[] buffer =
new
byte
[
512
];
boolean
identified =
false
;
while
((input.read(buffer) != -
1
) && (!identified)) {
identified = identify(buffer, decoder);
}
input.close();
if
(identified) {
return
charset;
}
else
{
return
null
;
}
}
catch
(Exception e) {
return
null
;
}
}
private
boolean
identify(
byte
[] bytes, CharsetDecoder decoder) {
try
{
decoder.decode(ByteBuffer.wrap(bytes));
}
catch
(CharacterCodingException e) {
return
false
;
}
return
true
;
}
public
static
void
main(String[] args) {
File f =
new
File(
"example.txt"
);
String[] charsetsToBeTested = {
"UTF-8"
,
"windows-1253"
,
"ISO-8859-7","GBK"
};
CharsetDetector cd =
new
CharsetDetector();
Charset charset = cd.detectCharset(f, charsetsToBeTested);
if
(charset !=
null
) {
try
{
InputStreamReader reader =
new
InputStreamReader(
new
FileInputStream(f), charset);
int
c =
0
;
while
((c = reader.read()) != -
1
) {
System.out.print((
char
)c);
}
reader.close();
}
catch
(FileNotFoundException fnfe) {
fnfe.printStackTrace();
}
catch
(IOException ioe){
ioe.printStackTrace();
}
}
else
{
System.out.println(
"Unrecognized charset."
);
}
}
阅读全文
0 0
- 获取文件编码
- 获取文件的编码
- JAVA获取文件编码
- 获取文件编码
- java获取文件编码
- delphi获取文件编码
- 获取文件编码chardet
- 获取文件编码
- java获取文件编码
- 简单自动获取文件编码
- 获取文件的编码格式
- 获取远程文件编码问题
- 获取文件的编码格式
- java获取文件编码实现
- Java 获取文件默认编码
- 获取文件采用什么编码
- java 获取文件编码格式
- JAVA上传文件获取文件编码格式
- 388. Longest Absolute File Path
- 使用POI读写word doc文件
- 微信小程序商品循环列表点击弹出当前的商品信息
- windows下通过wheel安装whl文件
- Page.OnPreLoad 方法对系统注入漏洞进行弥补
- 获取文件编码
- 子线程循环10次 主线程循环100次 然后如此反复50次
- 第六次C++实验
- Git检出项目--Idea中使用Git
- ElasticsearchCRUD使用(十九)【索引热身】
- jsp-js-java:json数据传值
- 92-ICMP 协议(基础)
- Android:高仿百度外卖、美团、淘点点二级联动效果!
- mysql emoji 乱码问题