异常:org.dom4j.DocumentException: 2字节的UTF-8序列的2无效

来源:互联网 发布:热分析软件哪个好 编辑:程序博客网 时间:2024/05/22 17:12
package youling.studio;


import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;


import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


/**
 * Created by lenovo on 2014/10/20.
 */
public class SougouNews {


    public  static void main(String[] args) throws Exception {
        SougouNews s = new SougouNews();
        Document doc = s.parse("C:\\Users\\lenovo\\Desktop\\news_sohusite_xml.smarty.xml");
        List<Element> eleDoc = s.getAllElementsByTag(doc,"doc");
        for (Element ele:eleDoc){
            ele.elementIterator("contenttitle");
            Element title = ele.element("contenttitle");
            System.out.println(title.getText());
        }




    }


    /**
     * 得到指定标签下的指定标签名的所有标签
     * @param document
     * @param tag
     */
    public List<Element> getAllElementsByTag(Document document,String tag){
        Element root = document.getRootElement();
        List<Element> list = new ArrayList<Element>();
        for(Iterator<Element> i = root.elementIterator(tag);i.hasNext();){
            Element ele = i.next();
            list.add(ele);
        }
        return list;
    }


    public Document parse(String filename) throws Exception {
        SAXReader reader = new SAXReader();
        Document document = reader.read(new BufferedReader(new InputStreamReader(new FileInputStream(new File(filename)),"UTF-8")));
        return  document;
    }

}



此句话是关键:

Document document = reader.read(new BufferedReader(new InputStreamReader(new FileInputStream(new File(filename)),"UTF-8")));

就是一个文件编码问题

0 0