import java.io.File; import java.io.FileInputStream; import java.io.IOException; import org.apache.poi.POITextExtractor; import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.hwpf.extractor.Word6Extractor; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.xmlbeans.XmlException; public class ReadWord { public static void main(String[] args) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { FileInputStream fileInputStream = new FileInputStream(new File( "C:\\Users\\CY\\Desktop\\a.docx")); POITextExtractor extractor = ExtractorFactory .createExtractor(fileInputStream); if (extractor instanceof Word6Extractor) { // Word 95 Word6Extractor extractor2 = (Word6Extractor) extractor; System.out.println(extractor2.getText()); } else if (extractor instanceof WordExtractor) { // Word 2003 WordExtractor extractor2 = (WordExtractor) extractor; System.out.println(extractor2.getText()); } else if (extractor instanceof XWPFWordExtractor) { // Word 2007 XWPFWordExtractor extractor2 = (XWPFWordExtractor) extractor; System.out.println(extractor2.getText()); } } }
參考資料:
http://poi.apache.org/
http://poi.apache.org/hwpf/
http://blog.changyy.org/2012/04/java-apache-poi-wordexcelpowerpoint.html
http://poi.apache.org/text-extraction.html