001: /*
002: * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005: package com.sun.portal.ubt.report.data.file.parser;
006:
007: import java.io.IOException;
008: import java.util.HashMap;
009: import java.util.Map;
010:
011: /**
012: * Decorate a CSVParse object to provide an index of field names. Many (most?)
013: * CSV files have a list of field names (labels) as the first line. A
014: * LabeledCSVParser will consume this line automatically. The methods
015: * {@link #getLabels()}, {@link #getLabelIndex(String)} and
016: * {@link #getValueByLabel(String)} allow these labels to be discovered and
017: * used while parsing CSV data. This class can also be used to conveniently
018: * ignore field labels if they happen to be present in a CSV file and are not
019: * desired.
020: *
021: */
022: public class LabeledCSVParser implements CSVParse {
023:
024: /**
025: * Class which actually does the parsing. Called for most methods.
026: *
027: */
028: private CSVParse parse;
029:
030: /**
031: * The first line of the CSV file - treated specially as labels.
032: * Set by setLabels.
033: *
034: */
035: private String[] labels;
036:
037: /**
038: * Hash of the labels (String) to column number (Integer).
039: * Set by setLabels.
040: *
041: */
042: private Map labelMap;
043:
044: /**
045: * The last line read from the CSV file. Saved for getValueByLabel().
046: *
047: */
048: private String[] lastLine;
049:
050: /**
051: * Set whenever nextValue is called and checked when getValueByLabel() is
052: * called to enforce incompatibility between the methods.
053: *
054: */
055: private int nextValueLine = -2;
056:
057: private boolean labelSet = false;
058:
059: /**
060: * Construct a LabeledCSVParser on a CSVParse implementation.
061: *
062: * @param parse CSVParse implementation
063: * @throws IOException if an error occurs while reading.
064: *
065: */
066: public LabeledCSVParser(CSVParse parse) throws IOException {
067: this .parse = parse;
068: //setLabels();
069: }
070:
071: /**
072: * Change this parser so that it uses a new delimiter.
073: * <p>
074: * The initial character is a comma, the delimiter cannot be changed
075: * to a quote or other character that has special meaning in CSV.
076: *
077: * @param newDelim delimiter to which to switch.
078: * @throws com.sun.portal.ubt.report.data.file.parser.BadDelimiterException if the character cannot be used as a delimiter.
079: *
080: */
081: public void changeDelimiter(char newDelim)
082: throws BadDelimiterException {
083: parse.changeDelimiter(newDelim);
084: }
085:
086: /**
087: * Change this parser so that it uses a new character for quoting.
088: * <p>
089: * The initial character is a double quote ("), the delimiter cannot be changed
090: * to a comma or other character that has special meaning in CSV.
091: *
092: * @param newQuote character to use for quoting.
093: * @throws BadQuoteException if the character cannot be used as a quote.
094: *
095: */
096: public void changeQuote(char newQuote) throws BadQuoteException {
097: parse.changeQuote(newQuote);
098: }
099:
100: /**
101: * Get all the values from the file.
102: * <p>
103: * If the file has already been partially read, only the
104: * values that have not already been read will be included.
105: * <p>
106: * Each line of the file that has at least one value will be
107: * represented. Comments and empty lines are ignored.
108: * <p>
109: * The resulting double array may be jagged.
110: * <p>
111: * The last line of the values is saved and may be accessed
112: * by getValueByLabel().
113: *
114: * @return all the values from the file or null if there are no more values.
115: * @throws IOException if an error occurs while reading.
116: *
117: */
118: public String[][] getAllValues() throws IOException {
119: String[][] allValues = parse.getAllValues();
120: lastLine = allValues[allValues.length - 1];
121: return allValues;
122: }
123:
124: /**
125: * Get the line number that the last token came from.
126: * <p>
127: * New line breaks that occur in the middle of a token are not
128: * counted in the line number count.
129: * <p>
130: * The first line of labels does not count towards the line number.
131: *
132: * @return line number or -1 if no tokens have been returned yet.
133: *
134: */
135: public int getLastLineNumber() {
136: return lastLineNumber();
137: }
138:
139: /**
140: * Get the line number that the last token came from.
141: * <p>
142: * New line breaks that occur in the middle of a token are not
143: * counted in the line number count.
144: * <p>
145: * The first line of labels does not count towards the line number.
146: *
147: * @return line number or -1 if no tokens have been returned yet.
148: *
149: */
150: public int lastLineNumber() {
151: int lineNum = parse.getLastLineNumber();
152: if (lineNum <= -1)
153: return -1; // Nothing has been read yet
154: if (lineNum == 1)
155: return -1; // only labels have been read
156: return lineNum - 1; // adjust line number to account for the label line
157: }
158:
159: /**
160: * Get all the values from a line.
161: * <p>
162: * If the line has already been partially read, only the values that have not
163: * already been read will be included.
164: * <p>
165: * In addition to returning all the values from a line, LabeledCSVParser
166: * maintains a buffer of the values. This feature allows
167: * {@link #getValueByLabel(String)} to function. In this case
168: * {@link #getLine()} is used simply to iterate CSV data. The iteration ends
169: * when null is returned.
170: * <p>
171: * <b>Note:</b> The methods {@link #nextValue()} and {@link #getAllValues()}
172: * are incompatible with {@link #getValueByLabel(String)} because the former
173: * methods cause the offset of field values to shift and corrupt the internal
174: * buffer maintained by {@link #getLine}.
175: *
176: * @return all the values from the line or null if there are no more values.
177: * @throws IOException if an error occurs while reading.
178: *
179: */
180: public String[] getLine() throws IOException {
181: lastLine = parse.getLine();
182: return lastLine;
183: }
184:
185: /**
186: * Read the next value from the file. The line number from
187: * which this value was taken can be obtained from getLastLineNumber().
188: * <p>
189: * This method is not compatible with getValueByLabel(). Using this
190: * method will make getValueByLabel() throw an IllegalStateException
191: * for the rest of the line.
192: *
193: * @return the next value or null if there are no more values.
194: * @throws IOException if an error occurs while reading.
195: *
196: */
197: public String nextValue() throws IOException {
198: String nextValue = parse.nextValue();
199: nextValueLine = getLastLineNumber();
200: return nextValue;
201: }
202:
203: /**
204: * Initialize the LabeledCSVParser.labels member and LabeledCSVParser.labelMap
205: * member.
206: *
207: * @throws java.io.IOException
208: *
209: */
210: private void setLabels() throws IOException {
211: if (labelSet)
212: return;
213: labels = parse.getLine();
214: if (labels == null)
215: return;
216: labelMap = new HashMap();
217: for (int i = 0; i < labels.length; i++) {
218: labelMap.put(labels[i], new Integer(i));
219: }
220: }
221:
222: public void setLabels(HashMap labelMap) throws IOException {
223: if (labelMap == null)
224: return;
225: labelSet = true;
226: this .labelMap = labelMap;
227: }
228:
229: /**
230: * Return an array of all field names from the top
231: * of the CSV file.
232: *
233: * @return Field names.
234: *
235: */
236: public String[] getLabels() throws IOException {
237: return labels;
238: }
239:
240: /**
241: * Get the index of the column having the given label.
242: * The {@link #getLine()} method returns an
243: * array of field values for a single record of data. This method returns
244: * the index of a member of that array based on the specified field name.
245: * The first field has the index 0.
246: *
247: * @param label The field name.
248: * @return The index of the field name, or -1 if the label does not exist.
249: *
250: */
251: public int getLabelIndex(String label) {
252: if (labelMap == null)
253: return -1;
254: if (!labelMap.containsKey(label))
255: return -1;
256: return ((Integer) labelMap.get(label)).intValue();
257: }
258:
259: /**
260: * Given the label for the column, get the column from the last line that
261: * was read. If the column cannot be found in the line, null is returned.
262: *
263: * @param label The field name.
264: * @throws IllegalStateException if nextValue has been called as part of getting the last line. nextValue is not compatible with this method.
265: *
266: */
267: public String getValueByLabel(String label)
268: throws IllegalStateException {
269: if (nextValueLine == getLastLineNumber())
270: throw new IllegalStateException(
271: "nextValue() was used to get values from this line.");
272: if (lastLine == null)
273: return null;
274: int fieldIndex = getLabelIndex(label);
275: if (fieldIndex == -1)
276: return null;
277: if (fieldIndex >= lastLine.length)
278: return null;
279: return lastLine[fieldIndex];
280: }
281:
282: /**
283: * Close any stream upon which this parser is based.
284: *
285: * @throws IOException if an error occurs while closing the stream.
286: *
287: */
288: public void close() throws IOException {
289: parse.close();
290: }
291: }
|