Java解析rdf和xml文件以及处理excel

来源：互联网发布：域名被注册有什么坏处编辑：程序博客网时间：2024/06/05 18:37

一.Java解析rdf文件

1.下载jena库

首先，从这里下载 jena 包 apache-jana-2.7.*.tar.gz（我用的是2.7）。解压。
打开 Eclipse，新建一个Java Project。
右键点击项目->properties->Java Build Path -> libraries。将解压后 lib 目录下的 jar 文件添加到build path 中。
OK。现在可以在项目里使用 jena 了。

2.jean中的数据结构

在 Jena 中，资源用 Resource 类来表示，其属性用 Property 类来表示。而整体模型用Model 类来表示。一个 Model 对象可以包含多个资源。每一个资源又包括主语subject、谓语predicate、客体object来表示（下面是一个记录车辆移动数据的rdf文件）。

其中整个rdf文件可以看做是一个model，每个rdf标签可以看做是一个subject，每个rdf标签下的obs:hasxxx可以看做是predictate，每个<obs>xxx</obs>中间的xxx看做是object。整个解析类似xml。

3.使用jean

//获取model

public static Model readRDF(String fileName) {Model model = ModelFactory.createDefaultModel();InputStream in = FileManager.get().open(fileName);if (in == null) {throw new IllegalArgumentException("File: " + fileName+ " not found");}// 读取RDF/XML 文件return model.read(in, com.hp.hpl.jena.vocabulary.RDF.getURI(), "RDF/XML-ABBREV");// model.write(System.out);}

/** * 获取经纬度以及发送者ID * @param model 数据 * @param list  存储结果 */public static void getWantedItems(Model model,Map<String, List<Map<String, String>>> senderMap, HashSet<String> nameSet) {ResIterator subjects = model.listSubjects();while (subjects.hasNext()) {Resource subject = subjects.next();//Property property = model.createProperty();//System.out.println(subject.getLocalName());StmtIterator properties = subject.listProperties();Map<String, String> item = new HashMap<String, String>();String sender = null;while (properties.hasNext()) {Statement stmt = properties.nextStatement();Property predicate = stmt.getPredicate();RDFNode object = stmt.getObject();String val = null;String name = predicate.getLocalName().trim();//System.out.println(name);val = object.toString().split("\\^\\^")[0];if (name.equals("hasLongitude")) {item.put("longitude", val);} else if (name.equals("hasLatitude")) {item.put("latitude", val);} else if (name.equals("hasSender")) {sender = val.trim();item.put("sender", val);nameSet.add(sender);}//System.out.println();}if (senderMap.get(sender)!=null) {senderMap.get(sender).add(item);}else {System.out.println("新车id："+sender+"创建list成功");List<Map<String, String>> list = new ArrayList<Map<String,String>>();list.add(item);senderMap.put(sender, list);}properties.close();}subjects.close();//System.out.println(list);}

二.Java解析xml

1.DOM方式

DOM模式解析XML，是把整个XML文档当成一个对象来处理，会先把整个文档读入到内存里。是基于树的结构，通常需要加载整文档和构造DOM树，然后才能开始工作。

优缺点：解析简单，但不适合大文件

2.SAX方式

基于事件驱动的方式，适合大文件，非一次性将文件读入内存，主要是要写自己的handler。

/** * 读取xml文件，使用SAXParser解析 *  * @param uri * @param NodeName * @return */public static List<Map<String, String>> ReadXML(String uri, String NodeName) {try {// 创建一个解析XML的工厂对象SAXParserFactory parserFactory = SAXParserFactory.newInstance();// 创建一个解析XML的对象SAXParser parser = parserFactory.newSAXParser();// 创建一个解析助手类Myhandler myhandler = new Myhandler("Observation");parser.parse(uri, myhandler);return myhandler.getList();} catch (Exception e) {e.printStackTrace();} finally {}return null;}/** * 将list容器里的内容写入到新的xml文件 *  * @param vanet */public static void createXML(List<Map<String, String>> vanet) {try {// 创建工厂SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();TransformerHandler handler = factory.newTransformerHandler();Transformer info = handler.getTransformer();// 是否自动添加额外的空白info.setOutputProperty(OutputKeys.INDENT, "yes");// 设置字符编码info.setOutputProperty(OutputKeys.ENCODING, "utf-8");info.setOutputProperty(OutputKeys.VERSION, "1.0");info.setOutputProperty(OutputKeys.STANDALONE, "no");// 保存创建的saxvanet.xmlStreamResult result = new StreamResult(new FileOutputStream("f:\\xml\\map2.gpx"));handler.setResult(result);// 开始xmlhandler.startDocument();AttributesImpl impl = new AttributesImpl();impl.clear();addAttr(impl);handler.startElement("", "", "gpx", impl);for (int i = 0; i < vanet.size(); i++) {/* * <wpt lat="25.55" lon="99.1666666666667"> * <ele>123</ele> * <name>矿1</name> * <desc>test</desc> * <sym>unistrong:104</sym> * </wpt> */Map<String, String> map = vanet.get(i);Vehicle vh = setVehicle(map);// 生成<wpt lat="xx" lon="xx">impl.clear(); // 清空属性impl.addAttribute("", "", "lat", "", vh.getLatitude());impl.addAttribute("", "", "lon", "", vh.getLongitude());handler.startElement("", "", "wpt", impl);// 生成<name>xx</name>元素impl.clear();handler.startElement("", "", "name", impl);String name = vh.getSender();//System.out.println("name:"+name);handler.characters(name.toCharArray(), 0, name.length()); // 为name元素添加文本handler.endElement("", "", "name");impl.clear();handler.endElement("", "", "wpt");}// 生成</class>handler.endElement("", "", "gpx");handler.endDocument();System.out.println("complete.................................................");} catch (Exception e) {e.printStackTrace();}}public static void addAttr(AttributesImpl impl) {impl.addAttribute("", "", "xmlns", "","http://www.topografix.com/GPX/1/1");impl.addAttribute("", "", "creator", "", "MapSource 6.5");impl.addAttribute("", "", "version", "", "1.1");impl.addAttribute("", "", "xmlns:xsi", "","http://www.w3.org/2001/XMLSchema-instance");impl.addAttribute("", "", "xsi:schemaLocation", "","http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd");System.out.println("添加属性成功");}

public class Myhandler extends DefaultHandler {//private HashSet<String> set = null;// 存储正在解析的元素的数据private Map<String, String> map = null;// 存储所有解析的元素的数据private List<Map<String, String>> list = null;// 正在解析的元素的名字String currentTag = null;// 正在解析的元素的元素值String currentValue = null;// 开始解析的元素String nodeName = null;public Myhandler(String nodeName) {// TODO Auto-generated constructor stubthis.nodeName = nodeName;}public HashSet<String> getSet() {return set;}public List<Map<String, String>> getList() {return list;}/* * <Observation ID="18"> <Time>2013-04-18T08:00:00+00:00</Time> * <Area>90268</Area> <Coordinates X="2521.4661" Y="6507.9541" /> * <Velocity>60</Velocity> <Direction>169</Direction> * <ManualInfo>0</ManualInfo> <Sender>134569370</Sender> </Observation> */// 开始解析文档，即开始解析XML根元素时调用该方法@Overridepublic void startDocument() throws SAXException {// TODO Auto-generated method stubSystem.out.println("--startDocument()--");// 初始化Map//list = new ArrayList<Map<String, String>>();set = new HashSet<String>();}// 开始解析每个元素时都会调用该方法@Overridepublic void startElement(String uri, String localName, String qName,Attributes attributes) throws SAXException {// TODO Auto-generated method stub// 判断正在解析的元素是不是开始解析的元素currentTag = null;//System.out.println("--startElement()--" + qName);if (qName.equals(nodeName)) {//map = new HashMap<String, String>();return;}/*if (qName.equals("Time") || qName.equals("Area")|| qName.equals("Velocity") || qName.equals("Direction")||qName.equals("ManualInfo")) {return;}*/if (qName.equals("Sender")) {//System.out.println("sender");// 判断正在解析的元素是否有属性值,如果有则将其全部取出并保存到map对象中，如:<person id="00001"></person>/*if (attributes != null && map != null) {for (int i = 0; i < attributes.getLength(); i++) {//map.put(attributes.getQName(i), attributes.getValue(i));//set.add(e)}}*/currentTag = qName; // 正在解析的元素}else {return ;}}// 解析到每个元素的内容时会调用此方法@Overridepublic void characters(char[] ch, int start, int length)throws SAXException {// TODO Auto-generated method stub//System.out.println("--characters()--");&& map != nullif (currentTag != null) {currentValue = new String(ch, start, length);// 如果内容不为空和空格，也不是换行符则将该元素名和值和存入map中if (currentValue != null && !currentValue.trim().equals("")&& !currentValue.trim().equals("\n")) {//map.put(currentTag, currentValue);set.add(currentValue);System.out.println(currentTag + ": " + currentValue);}// 当前的元素已解析过，将其置空用于下一个元素的解析currentTag = null;currentValue = null;}}// 每个元素结束的时候都会调用该方法@Overridepublic void endElement(String uri, String localName, String qName)throws SAXException {// TODO Auto-generated method stub//System.out.println("--endElement()--" + qName);// 判断是否为一个节点结束的元素标签/*if (qName.equals(nodeName)) {list.add(map);map = null;}*/}// 结束解析文档，即解析根元素结束标签时调用该方法@Overridepublic void endDocument() throws SAXException {// TODO Auto-generated method stubSystem.out.println("--endDocument()--");super.endDocument();}}

三.解析excel

使用jxl库，其使用还是比较简单的（代码可能不能直接用，里面有些entity类需要自己建，有些list也需要自己处理）。

public class WriteToExcelUtils {private static final int MAXROWS = 60000;private static int INDEX = 0;public static void writeExcel(String fileName,List<Map<String, String>> list, String excel) {String excelName = excel;String sheetName = list.get(0).get("dateTime");//System.out.println("表名："+sheetName);int startLine = 0;int rows = list.size();Workbook wb = null;WritableWorkbook wwb = null;boolean flag = false;try {File is = new File(fileName+"\\"+excelName+".xls");if (!is.exists()) {System.out.println("新的excel："+excelName+".xls");wwb = Workbook.createWorkbook(is);flag = true;} else {wb = Workbook.getWorkbook(is);Sheet sheet = wb.getSheet(sheetName);if (sheet!=null) {// 获取行int length = sheet.getRows();startLine = length;}else {//System.out.println(sheetName+"不存在");flag = true;}// 首先要使用Workbook类的工厂方法创建一个可写入的工作薄(Workbook)对象// wwb = Workbook.createWorkbook(new File(fileName));wwb = Workbook.createWorkbook(is, wb);}} catch (Exception e) {e.printStackTrace();}if (wwb != null) {// 创建一个可写入的工作表// Workbook的createSheet方法有两个参数，第一个是工作表的名称，第二个是工作表在工作薄中的位置// System.out.println(index);WritableSheet ws = null;if (flag) {System.out.println("创建新的sheet: "+sheetName);ws = wwb.createSheet(sheetName, 0);} else{//System.out.println(sheetName);ws = wwb.getSheet(0);}// 下面开始添加单元格for (int i = 0; i < rows; i++) {Vehicle vh = SaxService.setVehicle(list.get(i));String dte = vh.getDate();String longitude = vh.getLongitude();String latitude = vh.getLatitude();List<String> items = new ArrayList<String>();items.add(dte);items.add(longitude);items.add(latitude);for (int j = 0; j < 3; j++) {write2Cell(ws, j, i + startLine, items.get(j));}}//index += rows;try {// 从内存中写入文件中wwb.write();// 关闭资源，释放内存wwb.close();} catch (IOException e) {e.printStackTrace();} catch (WriteException e) {e.printStackTrace();}}}public static void write2Cell(WritableSheet ws, int c, int r, String item) {// System.out.println(index+r);// 这里需要注意的是，在Excel中，第一个参数表示列，第二个表示行Label labelC = new Label(c, r, item);try {// 将生成的单元格添加到工作表中ws.addCell(labelC);} catch (RowsExceededException e) {e.printStackTrace();} catch (WriteException e) {e.printStackTrace();}}

<pre name="code" class="java">/** * 读Excel *  * @param pathname */public static LinkedList<HashMap<String, String>> readExcel(String pathname) {LinkedList<HashMap<String, String>> list = new LinkedList<HashMap<String, String>>();File file = new File(pathname);String uri = "f:\\xml\\map\\edge.xml";Workbook wb = null;try {wb = Workbook.getWorkbook(file);Sheet sheet = wb.getSheet(0);int rows = sheet.getRows();// ThreadPoolExecutorTest threadPool = new// ThreadPoolExecutorTest(queue);for (int i = 0; i < rows; i++) {HashMap<String, String> map = new HashMap<String, String>();// System.out.println("第"+(i+1)+"条数据正在执行");Cell[] cols = sheet.getRow(i);String sender = cols[0].getContents();String nodeID = cols[1].getContents();String wayID = cols[2].getContents();String depart = cols[3].getContents();String edgeID = SaxService.getWayIdFromNodeXML(uri, nodeID, wayID);map.put("sender", sender);map.put("nodeID", nodeID);map.put("wayID", wayID);map.put("edgeID", edgeID);map.put("depart", depart);list.add(map);}} catch (Exception e) {e.printStackTrace();}return list;}

0 0