001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * Classifier.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.classifiers;
024:
025: import weka.core.Attribute;
026: import weka.core.Capabilities;
027: import weka.core.CapabilitiesHandler;
028: import weka.core.Instance;
029: import weka.core.Instances;
030: import weka.core.Option;
031: import weka.core.OptionHandler;
032: import weka.core.SerializedObject;
033: import weka.core.Utils;
034:
035: import java.io.Serializable;
036: import java.util.Enumeration;
037: import java.util.Vector;
038:
039: /**
040: * Abstract classifier. All schemes for numeric or nominal prediction in
041: * Weka extend this class. Note that a classifier MUST either implement
042: * distributionForInstance() or classifyInstance().
043: *
044: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
045: * @author Len Trigg (trigg@cs.waikato.ac.nz)
046: * @version $Revision: 1.17 $
047: */
048: public abstract class Classifier implements Cloneable, Serializable,
049: OptionHandler, CapabilitiesHandler {
050:
051: /** for serialization */
052: private static final long serialVersionUID = 6502780192411755341L;
053:
054: /** Whether the classifier is run in debug mode. */
055: protected boolean m_Debug = false;
056:
057: /**
058: * Generates a classifier. Must initialize all fields of the classifier
059: * that are not being set via options (ie. multiple calls of buildClassifier
060: * must always lead to the same result). Must not change the dataset
061: * in any way.
062: *
063: * @param data set of instances serving as training data
064: * @exception Exception if the classifier has not been
065: * generated successfully
066: */
067: public abstract void buildClassifier(Instances data)
068: throws Exception;
069:
070: /**
071: * Classifies the given test instance. The instance has to belong to a
072: * dataset when it's being classified. Note that a classifier MUST
073: * implement either this or distributionForInstance().
074: *
075: * @param instance the instance to be classified
076: * @return the predicted most likely class for the instance or
077: * Instance.missingValue() if no prediction is made
078: * @exception Exception if an error occurred during the prediction
079: */
080: public double classifyInstance(Instance instance) throws Exception {
081:
082: double[] dist = distributionForInstance(instance);
083: if (dist == null) {
084: throw new Exception("Null distribution predicted");
085: }
086: switch (instance.classAttribute().type()) {
087: case Attribute.NOMINAL:
088: double max = 0;
089: int maxIndex = 0;
090:
091: for (int i = 0; i < dist.length; i++) {
092: if (dist[i] > max) {
093: maxIndex = i;
094: max = dist[i];
095: }
096: }
097: if (max > 0) {
098: return maxIndex;
099: } else {
100: return Instance.missingValue();
101: }
102: case Attribute.NUMERIC:
103: return dist[0];
104: default:
105: return Instance.missingValue();
106: }
107: }
108:
109: /**
110: * Predicts the class memberships for a given instance. If
111: * an instance is unclassified, the returned array elements
112: * must be all zero. If the class is numeric, the array
113: * must consist of only one element, which contains the
114: * predicted value. Note that a classifier MUST implement
115: * either this or classifyInstance().
116: *
117: * @param instance the instance to be classified
118: * @return an array containing the estimated membership
119: * probabilities of the test instance in each class
120: * or the numeric prediction
121: * @exception Exception if distribution could not be
122: * computed successfully
123: */
124: public double[] distributionForInstance(Instance instance)
125: throws Exception {
126:
127: double[] dist = new double[instance.numClasses()];
128: switch (instance.classAttribute().type()) {
129: case Attribute.NOMINAL:
130: double classification = classifyInstance(instance);
131: if (Instance.isMissingValue(classification)) {
132: return dist;
133: } else {
134: dist[(int) classification] = 1.0;
135: }
136: return dist;
137: case Attribute.NUMERIC:
138: dist[0] = classifyInstance(instance);
139: return dist;
140: default:
141: return dist;
142: }
143: }
144:
145: /**
146: * Creates a new instance of a classifier given it's class name and
147: * (optional) arguments to pass to it's setOptions method. If the
148: * classifier implements OptionHandler and the options parameter is
149: * non-null, the classifier will have it's options set.
150: *
151: * @param classifierName the fully qualified class name of the classifier
152: * @param options an array of options suitable for passing to setOptions. May
153: * be null.
154: * @return the newly created classifier, ready for use.
155: * @exception Exception if the classifier name is invalid, or the options
156: * supplied are not acceptable to the classifier
157: */
158: public static Classifier forName(String classifierName,
159: String[] options) throws Exception {
160:
161: return (Classifier) Utils.forName(Classifier.class,
162: classifierName, options);
163: }
164:
165: /**
166: * Creates a deep copy of the given classifier using serialization.
167: *
168: * @param model the classifier to copy
169: * @return a deep copy of the classifier
170: * @exception Exception if an error occurs
171: */
172: public static Classifier makeCopy(Classifier model)
173: throws Exception {
174:
175: return (Classifier) new SerializedObject(model).getObject();
176: }
177:
178: /**
179: * Creates a given number of deep copies of the given classifier using serialization.
180: *
181: * @param model the classifier to copy
182: * @param num the number of classifier copies to create.
183: * @return an array of classifiers.
184: * @exception Exception if an error occurs
185: */
186: public static Classifier[] makeCopies(Classifier model, int num)
187: throws Exception {
188:
189: if (model == null) {
190: throw new Exception("No model classifier set");
191: }
192: Classifier[] classifiers = new Classifier[num];
193: SerializedObject so = new SerializedObject(model);
194: for (int i = 0; i < classifiers.length; i++) {
195: classifiers[i] = (Classifier) so.getObject();
196: }
197: return classifiers;
198: }
199:
200: /**
201: * Returns an enumeration describing the available options.
202: *
203: * @return an enumeration of all the available options.
204: */
205: public Enumeration listOptions() {
206:
207: Vector newVector = new Vector(1);
208:
209: newVector
210: .addElement(new Option(
211: "\tIf set, classifier is run in debug mode and\n"
212: + "\tmay output additional info to the console",
213: "D", 0, "-D"));
214: return newVector.elements();
215: }
216:
217: /**
218: * Parses a given list of options. Valid options are:<p>
219: *
220: * -D <br>
221: * If set, classifier is run in debug mode and
222: * may output additional info to the console.<p>
223: *
224: * @param options the list of options as an array of strings
225: * @exception Exception if an option is not supported
226: */
227: public void setOptions(String[] options) throws Exception {
228:
229: setDebug(Utils.getFlag('D', options));
230: }
231:
232: /**
233: * Gets the current settings of the Classifier.
234: *
235: * @return an array of strings suitable for passing to setOptions
236: */
237: public String[] getOptions() {
238:
239: String[] options;
240: if (getDebug()) {
241: options = new String[1];
242: options[0] = "-D";
243: } else {
244: options = new String[0];
245: }
246: return options;
247: }
248:
249: /**
250: * Set debugging mode.
251: *
252: * @param debug true if debug output should be printed
253: */
254: public void setDebug(boolean debug) {
255:
256: m_Debug = debug;
257: }
258:
259: /**
260: * Get whether debugging is turned on.
261: *
262: * @return true if debugging output is on
263: */
264: public boolean getDebug() {
265:
266: return m_Debug;
267: }
268:
269: /**
270: * Returns the tip text for this property
271: * @return tip text for this property suitable for
272: * displaying in the explorer/experimenter gui
273: */
274: public String debugTipText() {
275: return "If set to true, classifier may output additional info to "
276: + "the console.";
277: }
278:
279: /**
280: * Returns the Capabilities of this classifier. Derived classifiers have to
281: * override this method to enable capabilities.
282: *
283: * @return the capabilities of this object
284: * @see Capabilities
285: */
286: public Capabilities getCapabilities() {
287: return new Capabilities(this );
288: }
289:
290: /**
291: * runs the classifier instance with the given options.
292: *
293: * @param classifier the classifier to run
294: * @param options the commandline options
295: */
296: protected static void runClassifier(Classifier classifier,
297: String[] options) {
298: try {
299: System.out.println(Evaluation.evaluateModel(classifier,
300: options));
301: } catch (Exception e) {
302: if (((e.getMessage() != null) && (e.getMessage().indexOf(
303: "General options") == -1))
304: || (e.getMessage() == null))
305: e.printStackTrace();
306: else
307: System.err.println(e.getMessage());
308: }
309: }
310: }
|