一:java实现将word转换为html
1:引入依赖
1 <dependency> 2 <groupid>fr.opensagres.xdocreport</groupid> 3 <artifactid>fr.opensagres.xdocreport.document</artifactid> 4 <version>1.0.5</version> 5 </dependency> 6 <dependency> 7 <groupid>fr.opensagres.xdocreport</groupid> 8 <artifactid>org.apache.poi.xwpf.converter.xhtml</artifactid> 9 <version>1.0.5</version> 10 </dependency>11 <dependency>12 <groupid>org.apache.poi</groupid>13 <artifactid>poi</artifactid>14 <version>3.12</version>15 </dependency>16 <dependency>17 <groupid>org.apache.poi</groupid>18 <artifactid>poi-scratchpad</artifactid>19 <version>3.12</version>20 </dependency>
2:代码demo
1 package com.svse.controller; 2 3 import javax.xml.parsers.documentbuilderfactory; 4 import javax.xml.parsers.parserconfigurationexception; 5 import javax.xml.transform.outputkeys; 6 import javax.xml.transform.transformer; 7 import javax.xml.transform.transformerexception; 8 import javax.xml.transform.transformerfactory; 9 import javax.xml.transform.dom.domsource; 10 import javax.xml.transform.stream.streamresult; 11 12 import org.apache.poi.hwpf.hwpfdocument; 13 import org.apache.poi.hwpf.converter.picturesmanager; 14 import org.apache.poi.hwpf.converter.wordtohtmlconverter; 15 import org.apache.poi.hwpf.usermodel.picturetype; 16 import org.apache.poi.xwpf.converter.core.basicuriresolver; 17 import org.apache.poi.xwpf.converter.core.fileimageextractor; 18 import org.apache.poi.xwpf.converter.core.fileuriresolver; 19 import org.apache.poi.xwpf.converter.core.iuriresolver; 20 import org.apache.poi.xwpf.converter.core.ixwpfconverter; 21 import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter; 22 import org.apache.poi.xwpf.converter.xhtml.xhtmloptions; 23 import org.apache.poi.xwpf.usermodel.xwpfdocument; 24 /** 25 * word 转换成html 26 */ 27 public class testwordtohtml { 28 29 public static final string storagepath=c://works//files//; 30 public static final string ip=192.168.30.222; 31 public static final string port=8010; 32 public static void main(string[] args) throws ioexception, transformerexception, parserconfigurationexception { 33 testwordtohtml wt=new testwordtohtml(); 34 //wt.word2003tohtml(甲骨文考证.doc); 35 wt.word2007tohtml(甲骨文考证.docx); 36 37 } 38 39 /** 40 * 2003版本word转换成html 41 * @throws ioexception 42 * @throws transformerexception 43 * @throws parserconfigurationexception 44 */ 45 public void word2003tohtml(string filename) throws ioexception, transformerexception, parserconfigurationexception { 46 47 final string imagepath = storagepath+fileimage/;//解析时候如果doc文件中有图片 图片会保存在此路径 48 final string strranstring=getrandomnum(); 49 string filepath =storagepath; 50 string htmlname =filename.substring(0, filename.indexof(.))+ 2003.html; 51 final string file = filepath + filename; 52 inputstream input = new fileinputstream(new file(file)); 53 hwpfdocument worddocument = new hwpfdocument(input); 54 wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(documentbuilderfactory.newinstance().newdocumentbuilder().newdocument()); 55 //设置图片存放的位置 56 wordtohtmlconverter.setpicturesmanager(new picturesmanager() { 57 public string savepicture(byte[] content, picturetype picturetype, string suggestedname, float widthinches, float heightinches) { 58 file imgpath = new file(imagepath); 59 if(!imgpath.exists()){//图片目录不存在则创建 60 imgpath.mkdirs(); 61 } 62 63 file file = new file(imagepath +strranstring+suggestedname); 64 try { 65 outputstream os = new fileoutputstream(file); 66 os.write(content); 67 os.close(); 68 } catch (filenotfoundexception e) { 69 e.printstacktrace(); 70 } catch (ioexception e) { 71 e.printstacktrace(); 72 } 73 74 return http://+ip+:+port+//uploadfile/fileimage/+strranstring+suggestedname; 75 // return imagepath +strranstring+suggestedname; 76 } 77 }); 78 79 //解析word文档 80 wordtohtmlconverter.processdocument(worddocument); 81 document htmldocument = wordtohtmlconverter.getdocument(); 82 83 file htmlfile = new file(filepath +strranstring+htmlname); 84 outputstream outstream = new fileoutputstream(htmlfile); 85 86 87 domsource domsource = new domsource(htmldocument); 88 streamresult streamresult = new streamresult(outstream); 89 90 transformerfactory factory = transformerfactory.newinstance(); 91 transformer serializer = factory.newtransformer(); 92 serializer.setoutputproperty(outputkeys.encoding, utf-8); 93 serializer.setoutputproperty(outputkeys.indent, yes); 94 serializer.setoutputproperty(outputkeys.method, html); 95 96 serializer.transform(domsource, streamresult); 97 outstream.close(); 98 99 system.out.println(生成html文件路径:+ http://+ip+:+port+//uploadfile/+strranstring+htmlname);100 }101 102 /**103 * 2007版本word转换成html104 * @throws ioexception105 */106 public void word2007tohtml(string filename) throws ioexception {107 108 final string strranstring=getrandomnum();109 110 string filepath = storagepath+strranstring;111 string htmlname =filename.substring(0, filename.indexof(.))+ 2007.html;112 file f = new file(storagepath+filename); 113 if (!f.exists()) { 114 system.out.println(sorry file does not exists!); 115 } else { 116 if (f.getname().endswith(.docx) || f.getname().endswith(.docx)) { 117 try {118 // 1) 加载word文档生成 xwpfdocument对象 119 inputstream in = new fileinputstream(f); 120 xwpfdocument document = new xwpfdocument(in); 121 122 // 2) 解析 xhtml配置 (这里设置iuriresolver来设置图片存放的目录) 123 file imagefolderfile = new file(filepath); 124 xhtmloptions options = xhtmloptions.create().uriresolver(new fileuriresolver(imagefolderfile)); 125 options.setextractor(new fileimageextractor(imagefolderfile)); 126 options.uriresolver(new iuriresolver() {127 public string resolve(string uri) {128 //http://192.168.30.222:8010//uploadfile/....129 return http://+ip+:+port+//uploadfile/+strranstring +/+ uri;130 }131 });132 133 options.setignorestylesifunused(false); 134 options.setfragment(true); 135 136 // 3) 将 xwpfdocument转换成xhtml 137 outputstream out = new fileoutputstream(new file(filepath + htmlname)); 138 ixwpfconverter<xhtmloptions> converter = xhtmlconverter.getinstance();139 converter.convert(document,out, options);140 //xhtmlconverter.getinstance().convert(document, out, options); 141 system.out.println(html路径:+http://+ip+:+port+//uploadfile/+strranstring+htmlname);142 } catch (exception e) {143 e.printstacktrace();144 }145 146 } else { 147 system.out.println(enter only ms office 2007+ files); 148 } 149 } 150 } 151 152 /**153 *功能说明:生成时间戳154 *创建人:zsq155 *创建时间:2019年12月7日 下午2:37:09156 *157 */158 public static string getrandomnum(){159 date dt = new date();160 simpledateformat sdf = new simpledateformat(yyyymmddhhmmss); 161 string str=sdf.format(dt);162 return str;163 }164 165 }
二:java实现将pdf转换为html
1: 引入依赖
1 <dependency> 2 <groupid>net.sf.cssbox</groupid> 3 <artifactid>pdf2dom</artifactid> 4 <version>1.7</version> 5 </dependency> 6 <dependency> 7 <groupid>org.apache.pdfbox</groupid> 8 <artifactid>pdfbox</artifactid> 9 <version>2.0.12</version>10 </dependency>11 <dependency>12 <groupid>org.apache.pdfbox</groupid>13 <artifactid>pdfbox-tools</artifactid>14 <version>2.0.12</version>15 </dependency>16
2:代码demo
1 public class pdftohtml { 2 3 /* 4 pdf转换html 5 */ 6 public void pdftohtmltest(string inpdfpath,string outputhtmlpath) { 7 // string outputpath = c:\\works\\files\\zsq保密知识测试题库.html; 8 9 //try() 写在()里面会自动关闭流10 try{11 bufferedwriter out = new bufferedwriter(new outputstreamwriter(new fileoutputstream(new file(outputhtmlpath)),utf-8));12 //加载pdf文档13 //pddocument document = pddocument.load(bytes);14 pddocument document = pddocument.load(new file(inpdfpath));15 pdfdomtree pdfdomtree = new pdfdomtree();16 pdfdomtree.writetext(document,out);17 } catch (exception e) {18 e.printstacktrace();19 }20 }21 22 public static void main(string[] args) throws ioexception {23 pdftohtml ph=new pdftohtml();24 string pdfpath=c:\\works\\files\\武研中心行政考勤制度.pdf;25 string outputpath=c:\\works\\files\\武研中心行政考勤制度.html;26 ph.pdftohtmltest(pdfpath,outputpath);27 }28 29 }
三:java实现将txt转换为html
1 /* 2 * txt文档转html 3 filepath:txt原文件路径 4 htmlposition:转化后生成的html路径 5 */ 6 public static void txttohtml(string filepath, string htmlposition) { 7 try { 8 //string encoding = gbk; 9 file file = new file(filepath);10 if (file.isfile() && file.exists()) { // 判断文件是否存在11 inputstreamreader read = new inputstreamreader(new fileinputstream(file), gbk);12 // 考虑到编码格式13 bufferedreader bufferedreader = new bufferedreader(read);14 // 写文件15 fileoutputstream fos = new fileoutputstream(new file(htmlposition));16 outputstreamwriter osw = new outputstreamwriter(fos, gbk);17 bufferedwriter bw = new bufferedwriter(osw);18 string linetxt = null;19 while ((linetxt = bufferedreader.readline()) != null) {20 bw.write(   +linetxt + </br>);21 }22 bw.close();23 osw.close();24 fos.close();25 read.close();26 } else {27 system.out.println(找不到指定的文件);28 }29 } catch (exception e) {30 system.out.println(读取文件内容出错);31 e.printstacktrace();32 }33 }
以上就是java实现word/pdf/txt转html的方法的详细内容。