Well, I don’t know if this is the best solution, but we can test the file against various CharsetDecoders and see if any of them reports no errors. Here is a class implementing this behaviour (note: the code below will open and read the file and test it against the decoder until EOF is reached – if an error occurs it proceeds to the next decoder etc. – so if you specify a great number of charsets to be tested, or test large files, it will be slow ) :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

 
importjava.io.BufferedInputStream;
importjava.io.File;
importjava.io.FileInputStream;
importjava.io.FileNotFoundException;
importjava.io.IOException;
importjava.io.InputStreamReader;
importjava.nio.ByteBuffer;
importjava.nio.charset.CharacterCodingException;
importjava.nio.charset.Charset;
importjava.nio.charset.CharsetDecoder;
 
/**
 *
 * @author Georgios Migdos
 */
publicclass CharsetDetector {
 
    publicCharset detectCharset(File f, String[] charsets) {
 
        Charset charset = null;
 
        for(String charsetName : charsets) {
            charset = detectCharset(f, Charset.forName(charsetName));
            if(charset != null) {
                break;
            }
        }
 
        returncharset;
    }
 
    privateCharset detectCharset(File f, Charset charset) {
        try{
            BufferedInputStream input = newBufferedInputStream(newFileInputStream(f));
 
            CharsetDecoder decoder = charset.newDecoder();
            decoder.reset();
 
            byte[] buffer = newbyte[512];
            booleanidentified = false;
            while((input.read(buffer) != -1) && (!identified)) {
                identified = identify(buffer, decoder);
            }
 
            input.close();
 
            if(identified) {
                returncharset;
            }else{
                returnnull;
            }
 
        }catch(Exception e) {
            returnnull;
        }
    }
 
    privateboolean identify(byte[] bytes, CharsetDecoder decoder) {
        try{
            decoder.decode(ByteBuffer.wrap(bytes));
        }catch(CharacterCodingException e) {
            returnfalse;
        }
        returntrue;
    }
 
    publicstatic void main(String[] args) {
        File f = newFile("example.txt");
 
        String[] charsetsToBeTested = {"UTF-8","windows-1253","ISO-8859-7","GBK"};
 
        CharsetDetector cd = newCharsetDetector();
        Charset charset = cd.detectCharset(f, charsetsToBeTested);
 
        if(charset != null) {
            try{
                InputStreamReader reader = newInputStreamReader(newFileInputStream(f), charset);
                intc = 0;
                while((c = reader.read()) != -1) {
                    System.out.print((char)c);
                }
                reader.close();
            }catch(FileNotFoundException fnfe) {
                fnfe.printStackTrace();
            }catch(IOException ioe){
                ioe.printStackTrace();
            }
 
        }else{
            System.out.println("Unrecognized charset.");
        }
    }