利用poi将word转换为html,以便在浏览器上预览
package com.vito.demo.test;import java.io.bytearrayoutputstream;import java.io.file;import java.io.fileinputstream;import java.io.filenotfoundexception;import java.io.fileoutputstream;import java.io.inputstream;import java.util.list;import javax.xml.parsers.documentbuilderfactory;import javax.xml.transform.outputkeys;import javax.xml.transform.transformer;import javax.xml.transform.transformerfactory;import javax.xml.transform.dom.domsource;import javax.xml.transform.stream.streamresult;import org.apache.commons.io.fileutils;import org.apache.poi.hwpf.hwpfdocument;import org.apache.poi.hwpf.converter.picturesmanager;import org.apache.poi.hwpf.converter.wordtohtmlconverter;import org.apache.poi.hwpf.usermodel.picture;import org.apache.poi.hwpf.usermodel.picturetype;import org.w3c.dom.document;public class poiwordtohtml { public static void main(string[] args) throws throwable { final string path = g:\\doc\\; final string file = 客户需求文档.doc; inputstream input = new fileinputstream(path + file); hwpfdocument worddocument = new hwpfdocument(input); wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter( documentbuilderfactory.newinstance().newdocumentbuilder() .newdocument()); wordtohtmlconverter.setpicturesmanager(new picturesmanager() { public string savepicture(byte[] content, picturetype picturetype, string suggestedname, float widthinches, float heightinches) { return suggestedname; } }); wordtohtmlconverter.processdocument(worddocument); list pics = worddocument.getpicturestable().getallpictures(); if (pics != null) { for (int i = 0; i < pics.size(); i++) { picture pic = (picture) pics.get(i); try { pic.writeimagecontent(new fileoutputstream(path + pic.suggestfullfilename())); } catch (filenotfoundexception e) { e.printstacktrace(); } } } document htmldocument = wordtohtmlconverter.getdocument(); bytearrayoutputstream outstream = new bytearrayoutputstream(); domsource domsource = new domsource(htmldocument); streamresult streamresult = new streamresult(outstream); transformerfactory tf = transformerfactory.newinstance(); transformer serializer = tf.newtransformer(); serializer.setoutputproperty(outputkeys.encoding, utf-8); serializer.setoutputproperty(outputkeys.indent, yes); serializer.setoutputproperty(outputkeys.method, html); serializer.transform(domsource, streamresult); outstream.close(); string content = new string(outstream.tobytearray()); fileutils.write(new file(path, 1.html), content, utf-8); }}
相关参考链接:
jsp实现word文档的上传,在线预览,下载
java+flexpaper+swftools仿文库文档在线阅读
将html文本写入到word文件
public void htmltoword2() throws exception { inputstream bodyis = new fileinputstream(f:\\1.html); inputstream cssis = new fileinputstream(f:\\1.css); string body = this.getcontent(bodyis); string css = this.getcontent(cssis); //拼一个标准的html格式文档 string content = + body + ; inputstream is = new bytearrayinputstream(content.getbytes(gbk)); outputstream os = new fileoutputstream(f:\\1.doc); this.inputstreamtoword(is, os); } /** * 把is写入到对应的word输出流os中 * 不考虑异常的捕获,直接抛出 * @param is * @param os * @throws ioexception */ private void inputstreamtoword(inputstream is, outputstream os) throws ioexception { poifsfilesystem fs = new poifsfilesystem(); //对应于org.apache.poi.hdf.extractor.worddocument fs.createdocument(is, worddocument); fs.writefilesystem(os); os.close(); is.close(); } /** * 把输入流里面的内容以utf-8编码当文本取出。 * 不考虑异常,直接抛出 * @param ises * @return * @throws ioexception */ private string getcontent(inputstream... ises) throws ioexception { if (ises != null) { stringbuilder result = new stringbuilder(); bufferedreader br; string line; for (inputstream is : ises) { br = new bufferedreader(new inputstreamreader(is, utf-8)); while ((line=br.readline()) != null) { result.append(line); } } return result.tostring(); } return null; }