001: /*
002: * Milic Djordje
003: * djordje.milic@gmail.com
004: */
005:
006: package org.enhydra.snapper.parsers.fileparsers.parsers.wordparser2007parser;
007:
008: import java.io.File;
009: import java.io.IOException;
010: import java.io.InputStream;
011: import java.io.StringWriter;
012: import java.util.ArrayList;
013: import java.util.zip.ZipEntry;
014: import java.util.zip.ZipFile;
015: import java.util.zip.ZipInputStream;
016:
017: import javax.xml.parsers.DocumentBuilder;
018: import javax.xml.parsers.DocumentBuilderFactory;
019: import javax.xml.parsers.ParserConfigurationException;
020: import javax.xml.transform.Transformer;
021: import javax.xml.transform.TransformerConfigurationException;
022: import javax.xml.transform.TransformerException;
023: import javax.xml.transform.TransformerFactory;
024: import javax.xml.transform.dom.DOMSource;
025: import javax.xml.transform.stream.StreamResult;
026: import javax.xml.transform.stream.StreamSource;
027:
028: import org.enhydra.snapper.parsers.fileparsers.FileParserException;
029: import org.w3c.dom.Document;
030: import org.w3c.dom.Element;
031: import org.w3c.dom.Node;
032: import org.w3c.dom.NodeList;
033: import org.xml.sax.SAXException;
034:
035: public class Word2007ToText implements Word2007Converter {
036:
037: private File originalFile;
038:
039: private String parsedText = "";
040:
041: private ArrayList<String> customProperties;
042:
043: public static InputStream xslFile = null;
044:
045: StringWriter resultFile;
046:
047: public InputStream xmlFile;
048:
049: ZipInputStream instream = null;
050:
051: ZipFile infile = null;
052:
053: ZipEntry entry = null;
054:
055: Transformer transformer = null;
056:
057: public void init(File originalFile, int characterLimit,
058: String conversionPath) throws FileParserException {
059: this .originalFile = originalFile;
060: }
061:
062: public String parse(int limit) throws FileParserException {
063:
064: entry = infile.getEntry("docProps/custom.xml");
065: if (entry != null)
066: customProperties = getProperties(entry);
067:
068: entry = (infile.getEntry("word/document.xml"));
069:
070: if ((entry.getName().equals("word/document.xml"))) {
071: resultFile = new StringWriter();
072:
073: StreamResult result = new StreamResult(resultFile);
074:
075: DOMSource xmlSource = null;
076: DocumentBuilderFactory dbf = DocumentBuilderFactory
077: .newInstance();
078: dbf.setNamespaceAware(true);
079: DocumentBuilder db = null;
080: try {
081: db = dbf.newDocumentBuilder();
082: } catch (ParserConfigurationException e1) {
083: // TODO Auto-generated catch block
084: e1.printStackTrace();
085: }
086:
087: Node doc = null;
088: try {
089: doc = db.parse(infile.getInputStream(entry));
090: xmlSource = new DOMSource(doc);
091:
092: } catch (Exception ex) {
093: ex.printStackTrace();
094: }
095:
096: try {
097: transformer.transform(xmlSource, result);
098: doc = null;
099: xmlSource = null;
100: result = null;
101: } catch (TransformerException e) {
102: // TODO Auto-generated catch block
103: e.printStackTrace();
104: }
105:
106: parsedText = resultFile.toString();
107: }
108:
109: return (parsedText != null) ? parsedText : "";
110: }
111:
112: public void close() {
113:
114: resultFile = null;
115: try {
116: if (xmlFile != null)
117: xmlFile.close();
118: if (instream != null)
119: instream.close();
120: if (infile != null)
121: infile.close();
122: } catch (IOException e) {
123: // TODO Auto-generated catch block
124: System.out
125: .println("Word2007ToText-Unable to close all resources!!!");
126: }
127: }
128:
129: public String getConvertedContent() {
130: return null;
131: }
132:
133: public String getExtension() {
134: return "txt";
135: }
136:
137: public String getPureText() {
138: return parsedText;
139: }
140:
141: public void setZipFile(ZipInputStream stream, ZipFile file) {
142: xmlFile = stream;
143: infile = file;
144: }
145:
146: public void setEntry(ZipEntry zEntry) {
147: entry = zEntry;
148: }
149:
150: public ArrayList<String> getCustomProperties() {
151: return customProperties;
152: }
153:
154: private ArrayList<String> getProperties(ZipEntry zEntry) {
155: String temp = "";
156:
157: DocumentBuilderFactory dbf = DocumentBuilderFactory
158: .newInstance();
159: dbf.setNamespaceAware(false);
160: DocumentBuilder db = null;
161: Document doc = null;
162: NodeList nList = null;
163: String name = "";
164: String value = "";
165: ArrayList<String> parts = new ArrayList<String>();
166:
167: try {
168: db = dbf.newDocumentBuilder();
169: } catch (ParserConfigurationException e1) {
170: // TODO Auto-generated catch block
171: e1.printStackTrace();
172: }
173:
174: try {
175: doc = db.parse(infile.getInputStream(zEntry));
176: } catch (SAXException e) {
177: // TODO Auto-generated catch block
178: e.printStackTrace();
179: } catch (IOException e) {
180: // TODO Auto-generated catch block
181: e.printStackTrace();
182: }
183:
184: Element root = doc.getDocumentElement();
185: if (root.getNodeName().equalsIgnoreCase("Properties"))
186: nList = root.getElementsByTagName("property");
187: if (nList != null) {
188: for (int i = 0; i < nList.getLength(); i++) {
189: Element tempNode = (Element) nList.item(i);
190: name = tempNode.getAttribute("name");
191: Node temp2 = tempNode.getFirstChild().getFirstChild();
192: if (temp2 != null)
193: value = temp2.getNodeValue();
194: temp = name + " = " + value;
195: parts.add(temp);
196: }
197: }
198:
199: return parts;
200: }
201:
202: public Transformer getTransformer() {
203: return transformer;
204: }
205:
206: public void setTransformer(Transformer transformer) {
207: this .transformer = transformer;
208: }
209:
210: public void setTextTransformer(Transformer transformer) {
211: // TODO Auto-generated method stub
212:
213: }
214: }
|