2013/11/12

Java Use Apache POI read word file


import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;

public class ReadWord {
 public static void main(String[] args) throws IOException,
   InvalidFormatException, OpenXML4JException, XmlException {
  FileInputStream fileInputStream = new FileInputStream(new File(
    "C:\\Users\\CY\\Desktop\\a.docx"));
  POITextExtractor extractor = ExtractorFactory
    .createExtractor(fileInputStream);
  if (extractor instanceof Word6Extractor) {
   // Word 95
   Word6Extractor extractor2 = (Word6Extractor) extractor;
   System.out.println(extractor2.getText());
  } else if (extractor instanceof WordExtractor) {
   // Word 2003
   WordExtractor extractor2 = (WordExtractor) extractor;
   System.out.println(extractor2.getText());
  } else if (extractor instanceof XWPFWordExtractor) {
   // Word 2007
   XWPFWordExtractor extractor2 = (XWPFWordExtractor) extractor;
   System.out.println(extractor2.getText());
  }

 }
}







參考資料:
http://poi.apache.org/
http://poi.apache.org/hwpf/
http://blog.changyy.org/2012/04/java-apache-poi-wordexcelpowerpoint.html
http://poi.apache.org/text-extraction.html