import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;
public class ReadWord {
public static void main(String[] args) throws IOException,
InvalidFormatException, OpenXML4JException, XmlException {
FileInputStream fileInputStream = new FileInputStream(new File(
"C:\\Users\\CY\\Desktop\\a.docx"));
POITextExtractor extractor = ExtractorFactory
.createExtractor(fileInputStream);
if (extractor instanceof Word6Extractor) {
// Word 95
Word6Extractor extractor2 = (Word6Extractor) extractor;
System.out.println(extractor2.getText());
} else if (extractor instanceof WordExtractor) {
// Word 2003
WordExtractor extractor2 = (WordExtractor) extractor;
System.out.println(extractor2.getText());
} else if (extractor instanceof XWPFWordExtractor) {
// Word 2007
XWPFWordExtractor extractor2 = (XWPFWordExtractor) extractor;
System.out.println(extractor2.getText());
}
}
}
參考資料:
http://poi.apache.org/
http://poi.apache.org/hwpf/
http://blog.changyy.org/2012/04/java-apache-poi-wordexcelpowerpoint.html
http://poi.apache.org/text-extraction.html


