epublib 按指定层级拆分电子书
来源:互联网 发布:会计证模拟考试软件 编辑:程序博客网 时间:2024/06/10 14:39
import java.io.File;import java.io.FileOutputStream;import java.util.ArrayList;import java.util.Arrays;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Set;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.NodeList;import nl.siegmann.epublib.domain.Book;import nl.siegmann.epublib.domain.MediaType;import nl.siegmann.epublib.domain.Resource;import nl.siegmann.epublib.domain.TOCReference;import nl.siegmann.epublib.domain.TableOfContents;import nl.siegmann.epublib.epub.EpubReader;import nl.siegmann.epublib.epub.EpubWriter;import nl.siegmann.epublib.service.MediatypeService;import nl.siegmann.epublib.util.ResourceUtil;public class EpubRead {static List<TOCReference> tbList = new ArrayList<TOCReference>();@SuppressWarnings("rawtypes")public static void main(String[] args) {EpubReader epubReader = new EpubReader();try {MediaType[] lazyTypes = { MediatypeService.CSS, MediatypeService.GIF, MediatypeService.JPG,MediatypeService.PNG };String fileName = "C:\\TEMP\\sc.epub";Book book = epubReader.readEpubLazy(fileName, "UTF-8", Arrays.asList(lazyTypes));TableOfContents tableOfContents = book.getTableOfContents();List<TOCReference> refs = tableOfContents.getTocReferences();for (TOCReference ref : refs) {getNode(ref, 0);}for (TOCReference ref : tbList) {List<TOCReference> tocList = ref.getChildren();Book b = new Book();b.getMetadata().addTitle(ref.getTitle());// 封面从带图片的网页中获取Document document = ResourceUtil.getAsDocument(ref.getResource());NodeList nodeList = document.getElementsByTagName("img");Element element1 = (Element) nodeList.item(0);String coverHref = element1.getAttribute("src");coverHref = coverHref.substring(3, coverHref.length());Resource resource1 = book.getResources().getByHref(coverHref);b.setCoverImage(resource1);for (TOCReference tocReference : tocList) {// 将指定document中的内容替换Resource r = tocReference.getResource();String s = new String(r.getData());if ("目录".equals(tocReference.getTitle())) {s = s.replaceAll("<a href=\"part0001.xhtml\">返回总目录</a>", "");Resource rr = new Resource(s.getBytes(), r.getHref());b.addSection(tocReference.getTitle(), rr);System.out.println("------\n" + new String(rr.getData()));} else {b.addSection(tocReference.getTitle(), r);}Document doc = ResourceUtil.getAsDocument(r);Set<String> cssSet = new HashSet<String>();Set<String> imgSet = new HashSet<String>();NodeList cssList = doc.getElementsByTagName("link");NodeList imgList = doc.getElementsByTagName("img");for (int i = 0; i < cssList.getLength(); i++) {Element element = (Element) cssList.item(i);String href = element.getAttribute("href");href = href.substring(3, href.length());cssSet.add(href);}for (int i = 0; i < imgList.getLength(); i++) {Element element = (Element) imgList.item(i);String href = element.getAttribute("src");href = href.substring(3, href.length());imgSet.add(href);}for (Iterator iterator = cssSet.iterator(); iterator.hasNext();) {String href = (String) iterator.next();Resource resource = book.getResources().getByHref(href);b.addResource(resource);}for (Iterator iterator = imgSet.iterator(); iterator.hasNext();) {String href = (String) iterator.next();Resource resource = book.getResources().getByHref(href);b.addResource(resource);}}EpubWriter epubWriter = new EpubWriter();epubWriter.write(b, new FileOutputStream(new File("C:\\TEMP\\output\\" + ref.getTitle() + ".epub")));}} catch (Exception e) {e.printStackTrace();} finally {}}public static void getNode(TOCReference toc, Integer level) {List<TOCReference> tocList = toc.getChildren();if (tocList != null && tocList.size() > 0) {// 判断是否查到,没有了表示小的了if (level == 2) {tbList.add(toc);}for (int i = 0; i < tocList.size(); i++) {getNode(tocList.get(i), level + 1);}}}}
阅读全文
0 0
- epublib 按指定层级拆分电子书
- C++ 按指定分隔符拆分字符串
- java按指定时间拆分月份
- extjs tree 展开指定层级
- split()方法是将指定字符串按某指定的分隔符进行拆分
- 文件夹按层级打印
- 文件按层级打印
- sql按层级汇总
- 字符串拆分,根据指定分隔符拆分字符串
- 文本文件存在字符串数组,再按指定行截取,可以用来拆分文件。打印显示效果
- 输入M个字符串,请按指定长度N拆分每个字符串
- 从指定节点处拆分二叉树
- componentsSeparatedByCharactersInSet简单代码拆分指定字符
- List拆分成指定大小的subList
- 根据指定字符集拆分任意字符串
- 根据指定行数拆分内表
- 分隔list 按照指定的个数拆分
- 使用Epublib处理epub文件
- p3763DNA,倍增+hash
- python 获取对象信息的方法
- (三)Managing the Activity Lifecycle管理activity的生命周期
- spring+springmvc+mybatis框架注解版搭建
- java基础之谈谈==和equals
- epublib 按指定层级拆分电子书
- Tensorflow学习: 自编码器Tensorflow代码
- 在Apache2环境下安装SSL证书,利用301将http定向到https
- http
- linux的高级网络配置
- ardupilot如何读取传感器数据
- 深度学习Flappy Bird
- 用 CSS 实现元素垂直居中,有哪些好的方案?
- 矩阵变换中等距、相似、仿射和投影变换的小结