本篇文章主要通过实例代码介绍了java读取pdf、word文档,需要的朋友可以参考下
读取pdf文件jar引用
<dependency>
<groupid>org.apache.pdfbox</groupid>
pdfbox</artifactid>
<version>1.8.13</version>
</dependency>
读取word文件jar引用
<dependency>
<groupid>org.apache.poi</groupid>
poi-scratchpad</artifactid>
<version>3.16-beta1</version>
</dependency>
<dependency>
<groupid>org.apache.poi</groupid>
poi</artifactid>
<version>3.16-beta1</version>
</dependency>
读取word文件方法
/**
*
* @title: gettextfromword
* @description: 读取word
* @param filepath
* 文件路径
* @return: string 读出的word的内容
*/
public static string gettextfromword(string filepath) {
string result = null;
file file = new file(filepath);
fileinputstream fis = null;
try {
fis = new fileinputstream(file);
@suppresswarnings("resource")
wordextractor wordextractor = new wordextractor(fis);
result = wordextractor.gettext();
} catch (filenotfoundexception e) {
e.printstacktrace();
} catch (ioexception e) {
e.printstacktrace();
} finally {
if (fis != null) {
try {
fis.close();
} catch (ioexception e) {
e.printstacktrace();
}
}
}
return result;
}
读取pdf文件方法
/**
*
* @title: gettextfrompdf
* @description: 读取pdf文件内容
* @param filepath
* @return: 读出的pdf的内容
*/
public static string gettextfrompdf(string filepath) {
string result = null;
fileinputstream is = null;
pddocument document = null;
try {
is = new fileinputstream(filepath);
pdfparser parser = new pdfparser(is);
parser.parse();
document = parser.getpddocument();
pdftextstripper stripper = new pdftextstripper();
result = stripper.gettext(document);
} catch (filenotfoundexception e) {
e.printstacktrace();
} catch (ioexception e) {
e.printstacktrace();
} finally {
if (is != null) {
try {
is.close();
} catch (ioexception e) {
e.printstacktrace();
}
}
if (document != null) {
try {
document.close();
} catch (ioexception e) {
e.printstacktrace();
}
}
}
return result;
}
以上就是详解java读取pdf、word文档的方法的详细内容。