001: /*
002: * Milic Djordje
003: * djordje.milic@gmail.com
004: */
005:
006: package org.enhydra.snapper.parsers.fileparsers.parsers.excel2007parser;
007:
008: import java.io.*;
009: import java.util.ArrayList;
010: import java.util.zip.ZipEntry;
011: import java.util.zip.ZipFile;
012: import java.util.zip.ZipInputStream;
013:
014: import javax.xml.parsers.DocumentBuilder;
015: import javax.xml.parsers.DocumentBuilderFactory;
016: import javax.xml.parsers.ParserConfigurationException;
017: import javax.xml.transform.Transformer;
018: import javax.xml.transform.TransformerException;
019: import javax.xml.transform.TransformerFactory;
020: import javax.xml.transform.dom.DOMSource;
021: import javax.xml.transform.stream.StreamResult;
022: import javax.xml.transform.stream.StreamSource;
023:
024: import org.enhydra.snapper.parsers.fileparsers.FileParserException;
025: import org.w3c.dom.Document;
026: import org.w3c.dom.Element;
027: import org.w3c.dom.Node;
028: import org.w3c.dom.NodeList;
029: import org.xml.sax.SAXException;
030:
031: public class Excel2007ToText implements Excel2007Converter {
032:
033: private File originalFile;
034:
035: private String parsedText = "";
036:
037: public InputStream xslFile = null;
038:
039: private ArrayList<String> customProperties;
040:
041: StringWriter resultFile;
042:
043: public InputStream xmlFile;
044:
045: ZipInputStream instream = null;
046:
047: ZipFile infile = null;
048:
049: ZipEntry entry = null;
050:
051: Transformer transformer = null;
052:
053: boolean read = false;
054:
055: boolean readStrings = false;
056:
057: public Excel2007ToText() {
058: }
059:
060: public void init(File originalFile, int characterLimit,
061: String conversionPath) throws FileParserException {
062: this .originalFile = originalFile;
063: }
064:
065: public String parse(int limit) throws FileParserException {
066:
067: int t = 1;
068: try {
069: ZipEntry entry = null;
070:
071: entry = infile.getEntry("docProps/custom.xml");
072: if (entry != null)
073: customProperties = getProperties(entry);
074:
075: if (limit != 0) {
076: while ((entry = instream.getNextEntry()) != null) {
077: if (t <= limit) {
078: if (read)
079: break;
080:
081: entry = (infile.getEntry("xl/worksheets/sheet"
082: + t + ".xml"));
083: if ((readStrings == true) || (entry == null)) {
084: entry = (infile
085: .getEntry("xl/sharedStrings.xml"));
086: read = true;
087: }
088:
089: resultFile = new StringWriter();
090: t++;
091:
092: DOMSource xmlSource = null;
093: DocumentBuilderFactory dbf = DocumentBuilderFactory
094: .newInstance();
095: dbf.setNamespaceAware(false);
096: DocumentBuilder db = null;
097: try {
098: db = dbf.newDocumentBuilder();
099: } catch (ParserConfigurationException e1) {
100: // TODO Auto-generated catch block
101: e1.printStackTrace();
102: }
103: Node doc = null;
104:
105: try {
106: doc = db
107: .parse(infile.getInputStream(entry));
108: xmlSource = new DOMSource(doc);
109:
110: } catch (Exception ex) {
111: ex.printStackTrace();
112: }
113:
114: StreamResult result = new StreamResult(
115: resultFile);
116:
117: try {
118: transformer.transform(xmlSource, result);
119: doc = null;
120: xmlSource = null;
121: result = null;
122: } catch (TransformerException e) {
123: // TODO Auto-generated catch block
124: e.printStackTrace();
125: }
126:
127: if (t == limit) {
128: t--;
129: readStrings = true;
130: }
131:
132: parsedText = resultFile.toString();
133: }
134: }
135: } else {
136: while ((entry = instream.getNextEntry()) != null) {
137: if (read)
138: break;
139:
140: entry = (infile.getEntry("xl/worksheets/sheet" + t
141: + ".xml"));
142: if ((readStrings == true) || (entry == null)) {
143: entry = (infile
144: .getEntry("xl/sharedStrings.xml"));
145: read = true;
146: }
147:
148: resultFile = new StringWriter();
149: t++;
150:
151: DOMSource xmlSource = null;
152: DocumentBuilderFactory dbf = DocumentBuilderFactory
153: .newInstance();
154: dbf.setNamespaceAware(false);
155: DocumentBuilder db = null;
156: try {
157: db = dbf.newDocumentBuilder();
158: } catch (ParserConfigurationException e1) {
159: // TODO Auto-generated catch block
160: e1.printStackTrace();
161: }
162: Node doc = null;
163:
164: try {
165: doc = db.parse(infile.getInputStream(entry));
166: xmlSource = new DOMSource(doc);
167:
168: } catch (Exception ex) {
169: ex.printStackTrace();
170: }
171:
172: StreamResult result = new StreamResult(resultFile);
173:
174: try {
175: transformer.transform(xmlSource, result);
176: doc = null;
177: xmlSource = null;
178: result = null;
179: } catch (TransformerException e) {
180: // TODO Auto-generated catch block
181: e.printStackTrace();
182: }
183:
184: parsedText = resultFile.toString();
185: }
186: }
187:
188: } catch (Exception e) {
189: System.out
190: .println("Problem while parsing Excel 2007 file!");
191: }
192:
193: return (parsedText != null) ? parsedText : "";
194: }
195:
196: public void close() {
197:
198: resultFile = null;
199: try {
200: if (xmlFile != null)
201: xmlFile.close();
202: if (instream != null)
203: instream.close();
204: if (infile != null)
205: infile.close();
206: if (entry != null)
207: entry = null;
208: } catch (IOException e) {
209: // TODO Auto-generated catch block
210: System.out
211: .println("Excel2007ToText-Unable to close all resources!!!");
212: }
213: }
214:
215: public String getConvertedContent() {
216: return null;
217: }
218:
219: public String getExtension() {
220: return "txt";
221: }
222:
223: public String getPureText() {
224: return parsedText;
225: }
226:
227: public void setZipFile(InputStream stream, ZipFile file) {
228: xmlFile = stream;
229: infile = file;
230: }
231:
232: public void setZipFile(ZipInputStream stream, ZipFile file) {
233: instream = stream;
234: infile = file;
235: }
236:
237: public void setEntry(ZipEntry zEntry) {
238: entry = zEntry;
239: }
240:
241: public Transformer getTransformer() {
242: return transformer;
243: }
244:
245: public void setTransformer(Transformer transformer) {
246: this .transformer = transformer;
247: }
248:
249: public ArrayList<String> getCustomProperties() {
250: return customProperties;
251: }
252:
253: private ArrayList<String> getProperties(ZipEntry zEntry) {
254: String temp = "";
255:
256: DocumentBuilderFactory dbf = DocumentBuilderFactory
257: .newInstance();
258: dbf.setNamespaceAware(false);
259: DocumentBuilder db = null;
260: Document doc = null;
261: NodeList nList = null;
262: String name = "";
263: String value = "";
264: ArrayList<String> parts = new ArrayList<String>();
265:
266: try {
267: db = dbf.newDocumentBuilder();
268: } catch (ParserConfigurationException e1) {
269: // TODO Auto-generated catch block
270: e1.printStackTrace();
271: }
272:
273: try {
274: doc = db.parse(infile.getInputStream(zEntry));
275: } catch (SAXException e) {
276: // TODO Auto-generated catch block
277: e.printStackTrace();
278: } catch (IOException e) {
279: // TODO Auto-generated catch block
280: e.printStackTrace();
281: }
282:
283: Element root = doc.getDocumentElement();
284: if (root.getNodeName().equalsIgnoreCase("Properties"))
285: nList = root.getElementsByTagName("property");
286: if (nList != null) {
287: for (int i = 0; i < nList.getLength(); i++) {
288: Element tempNode = (Element) nList.item(i);
289: name = tempNode.getAttribute("name");
290: Node temp2 = tempNode.getFirstChild().getFirstChild();
291: if (temp2 != null)
292: value = temp2.getNodeValue();
293: temp = name + " = " + value;
294: parts.add(temp);
295: }
296: }
297:
298: return parts;
299: }
300:
301: public void setTextTransformer(Transformer transformer) {
302: // TODO Auto-generated method stub
303:
304: }
305: }
|