001: /*
002: * Milic Djordje
003: * djordje.milic@gmail.com
004: */
005:
006: package org.enhydra.snapper.parsers.fileparsers.parsers.powerpoint2007parser;
007:
008: import java.io.*;
009: import java.util.ArrayList;
010: import java.util.zip.ZipEntry;
011: import java.util.zip.ZipFile;
012: import java.util.zip.ZipInputStream;
013:
014: import javax.xml.parsers.DocumentBuilder;
015: import javax.xml.parsers.DocumentBuilderFactory;
016: import javax.xml.parsers.ParserConfigurationException;
017: import javax.xml.transform.Transformer;
018: import javax.xml.transform.TransformerException;
019: import javax.xml.transform.TransformerFactory;
020: import javax.xml.transform.dom.DOMSource;
021: import javax.xml.transform.stream.StreamResult;
022: import javax.xml.transform.stream.StreamSource;
023:
024: import org.enhydra.snapper.parsers.fileparsers.FileParserException;
025: import org.w3c.dom.Node;
026:
027: public class PowerPoint2007ToText implements PowerPoint2007Converter {
028:
029: private File originalFile;
030:
031: private String parsedText = "";
032:
033: StringWriter resultFile;
034:
035: ZipInputStream instream = null;
036:
037: ZipFile infile = null;
038:
039: ZipEntry entry = null;
040:
041: DOMSource xmlSource = null;
042:
043: Node doc = null;
044:
045: Transformer transformer = null;
046:
047: public PowerPoint2007ToText() {
048: }
049:
050: public void init(File originalFile, int characterLimit,
051: String conversionPath) throws FileParserException {
052: this .originalFile = originalFile;
053: }
054:
055: public String parse(int limit) throws FileParserException {
056:
057: try {
058: if (limit != 0) {
059: for (int t = 1; t <= limit; t++) {
060: entry = (infile.getEntry("ppt/slides/slide" + t
061: + ".xml"));
062: if (entry == null)
063: break;
064: resultFile = new StringWriter();
065:
066: DocumentBuilderFactory dbf = DocumentBuilderFactory
067: .newInstance();
068: dbf.setNamespaceAware(true);
069: DocumentBuilder db = null;
070: try {
071: db = dbf.newDocumentBuilder();
072: } catch (ParserConfigurationException e1) {
073: // TODO Auto-generated catch block
074: e1.printStackTrace();
075: }
076:
077: try {
078: doc = db.parse(infile.getInputStream(entry));
079: xmlSource = new DOMSource(doc);
080:
081: } catch (Exception ex) {
082: ex.printStackTrace();
083: }
084:
085: StreamResult result = new StreamResult(resultFile);
086:
087: try {
088: transformer.transform(xmlSource, result);
089: doc = null;
090: xmlSource = null;
091: result = null;
092: } catch (TransformerException e) {
093: // TODO Auto-generated catch block
094: e.printStackTrace();
095: }
096:
097: parsedText = parsedText + resultFile.toString();
098: }
099: } else {
100: int t = 1;
101: while ((entry = instream.getNextEntry()) != null) {
102: entry = (infile.getEntry("ppt/slides/slide" + t
103: + ".xml"));
104: if (entry == null)
105: break;
106: resultFile = new StringWriter();
107: t++;
108:
109: DocumentBuilderFactory dbf = DocumentBuilderFactory
110: .newInstance();
111: dbf.setNamespaceAware(true);
112: DocumentBuilder db = null;
113: try {
114: db = dbf.newDocumentBuilder();
115: } catch (ParserConfigurationException e1) {
116: // TODO Auto-generated catch block
117: e1.printStackTrace();
118: }
119:
120: try {
121: doc = db.parse(infile.getInputStream(entry));
122: xmlSource = new DOMSource(doc);
123:
124: } catch (Exception ex) {
125: ex.printStackTrace();
126: }
127:
128: StreamResult result = new StreamResult(resultFile);
129:
130: try {
131: transformer.transform(xmlSource, result);
132: doc = null;
133: xmlSource = null;
134: result = null;
135: } catch (TransformerException e) {
136: // TODO Auto-generated catch block
137: e.printStackTrace();
138: }
139:
140: parsedText = parsedText + resultFile.toString();
141: }
142: }
143:
144: } catch (Exception e) {
145: e.printStackTrace();
146: }
147: return (parsedText != null) ? parsedText : "";
148: }
149:
150: public void close() {
151:
152: resultFile = null;
153: try {
154: if (instream != null)
155: instream.close();
156: if (infile != null)
157: infile.close();
158: if (entry != null)
159: entry = null;
160: } catch (IOException e) {
161: // TODO Auto-generated catch block
162: System.out
163: .println("PowerPoint2007ToText-Unable to close all resources!!!");
164: }
165: }
166:
167: public String getConvertedContent() {
168: return null;
169: }
170:
171: public String getExtension() {
172: return "txt";
173: }
174:
175: public String getPureText() {
176: return parsedText;
177: }
178:
179: public void setZipFile(ZipInputStream stream, ZipFile file) {
180: instream = stream;
181: infile = file;
182: }
183:
184: public void setEntry(ZipEntry zEntry) {
185: entry = zEntry;
186: }
187:
188: public ArrayList<String> getCustomProperties() {
189: // TODO Auto-generated method stub
190: return null;
191: }
192:
193: public Transformer getTransformer() {
194: return transformer;
195: }
196:
197: public void setTransformer(Transformer transformer) {
198: this .transformer = transformer;
199: }
200:
201: public void setTextTransformer(Transformer transformer) {
202: // TODO Auto-generated method stub
203:
204: }
205:
206: }
|