本文共 6056 字,大约阅读时间需要 20 分钟。
需要的maven依赖经过编译,必须版本配合一致:fr.opensagres.xdocreport org.apache.poi.xwpf.converter.core 1.0.5 fr.opensagres.xdocreport org.apache.poi.xwpf.converter.xhtml 1.0.5 org.apache.commons commons-io 1.3.2 org.apache.poi poi-scratchpad 3.17 org.apache.commons commons-collections4 4.0 org.apache.poi poi 3.17 org.apache.xmlbeans xmlbeans 2.6.0 org.apache.poi poi-ooxml 3.14 package com.zyhao.openec.excel.utils; org.apache.commons commons-lang3 3.4
import java.io.BufferedWriter;
import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStreamWriter;import java.util.List;import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.PictureType;import org.apache.poi.xwpf.converter.core.FileImageExtractor;import org.apache.poi.xwpf.converter.core.IURIResolver;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.w3c.dom.Document;public class WordToHtml {
public static void main(String[] args) { try { wordToHtml("E:\me\2.docx", "E:\me\", "123.html");// wordToHtml("E:\me\2.doc", "E:\me\", "12.html");} catch (Exception e) { e.printStackTrace();}}public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws Exception { convert2Html(wordPath, htmlPath, newFilename);} public static void writeFile(String content, String path) throws Exception { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos)); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } /** * 将word转换成html * 支持 .doc and .docx * @param fileName word文件名 * @param outPutFilePath html存储路径 * @param newFileName html名 * @throws Exception */public static void convert2Html(String fileName, String outPutFilePath,String newFileName) throws Exception { String substring = fileName.substring(fileName.lastIndexOf(".")+1); ByteArrayOutputStream out = new ByteArrayOutputStream(); /** * word2007和word2003的构建方式不同, * 前者的构建方式是xml,后者的构建方式是dom树。 * 文件的后缀也不同,前者后缀为.docx,后者后缀为.doc * 相应的,apache.poi提供了不同的实现类。 */ if("docx".equals(substring)){
// writeFile(new String("<html><head> <meta http-equiv=\"content-type\" content=\"text/html\" charset=\"utf-8\"/></head>对不起,.docx格式的word文档,暂时不能生成预览</html>".getBytes("utf-8")), outPutFilePath+newFileName);
//step 1 : load DOCX into XWPFDocument InputStream inputStream = new FileInputStream(new File(fileName)); XWPFDocument document = new XWPFDocument(inputStream); //step 2 : prepare XHTML options final String imageUrl = ""; XHTMLOptions options = XHTMLOptions.create(); options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl))); options.setIgnoreStylesIfUnused(false); options.setFragment(true); options.URIResolver(new IURIResolver() {
// @Override 重写的方法,加上这个报错,你看看是啥问题
public String resolve(String uri) { return imageUrl + uri;}});//step 3 : convert XWPFDocument to XHTML XHTMLConverter.getInstance().convert(document, out, options); }else{ HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager( new PicturesManager() { public String savePicture( byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches ) { return suggestedName; } } ); wordToHtmlConverter.processDocument(wordDocument); //save pictures List pics=wordDocument.getPicturesTable().getAllPictures(); if(pics!=null&&!pics.isEmpty()){ for(int i=0;i
}
转载于:https://blog.51cto.com/17099933344/2087457