001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * Estimator.java
019: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.estimators;
024:
025: import weka.estimators.IncrementalEstimator;
026: import java.io.BufferedReader;
027: import java.io.FileReader;
028: import java.io.InputStreamReader;
029: import java.io.Reader;
030: import java.io.Serializable;
031: import java.util.Enumeration;
032: import java.util.Vector;
033: import weka.core.Capabilities;
034: import weka.core.Capabilities.Capability;
035: import weka.core.CapabilitiesHandler;
036: import weka.core.Instance;
037: import weka.core.Instances;
038: import weka.core.Option;
039: import weka.core.OptionHandler;
040: import weka.core.SerializedObject;
041: import weka.core.Utils;
042:
043: /**
044: *
045: * Abstract class for all estimators.
046: *
047: * Example code for a nonincremental estimator
048: * <code> <pre>
049: * // create a histogram for estimation
050: * EqualWidthEstimator est = new EqualWidthEstimator();
051: * est.addValues(instances, attrIndex);
052: * </pre> </code>
053: *
054: *
055: * Example code for an incremental estimator (incremental
056: * estimators must implement interface IncrementalEstimator)
057: * <code> <pre>
058: * // Create a discrete estimator that takes values 0 to 9
059: * DiscreteEstimator newEst = new DiscreteEstimator(10, true);
060: *
061: * // Create 50 random integers first predicting the probability of the
062: * // value, then adding the value to the estimator
063: * Random r = new Random(seed);
064: * for(int i = 0; i < 50; i++) {
065: * current = Math.abs(r.nextInt() % 10);
066: * System.out.println(newEst);
067: * System.out.println("Prediction for " + current
068: * + " = " + newEst.getProbability(current));
069: * newEst.addValue(current, 1);
070: * }
071: * </pre> </code>
072: *
073: *
074: * Example code for a main method for an estimator.<p>
075: * <code> <pre>
076: * public static void main(String [] argv) {
077: *
078: * try {
079: * LoglikeliEstimator est = new LoglikeliEstimator();
080: * Estimator.buildEstimator((Estimator) est, argv, false);
081: * System.out.println(est.toString());
082: * } catch (Exception ex) {
083: * ex.printStackTrace();
084: * System.out.println(ex.getMessage());
085: * }
086: * }
087: * </pre> </code>
088: *
089: *
090: * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
091: * @author Len Trigg (trigg@cs.waikato.ac.nz)
092: * @version $Revision: 1.9 $
093: */
094: public abstract class Estimator implements Cloneable, Serializable,
095: OptionHandler, CapabilitiesHandler {
096:
097: /** for serialization */
098: static final long serialVersionUID = -5902411487362274342L;
099:
100: /** Debugging mode */
101: private boolean m_Debug = false;
102:
103: /** The class value index is > -1 if subset is taken with specific class value only*/
104: protected double m_classValueIndex = -1.0;
105:
106: /** set if class is not important */
107: private boolean m_noClass = true;
108:
109: /**
110: * Class to support a building process of an estimator.
111: */
112: private static class Builder implements Serializable {
113:
114: /** for serialization */
115: private static final long serialVersionUID = -5810927990193597303L;
116:
117: /** instances of the builder */
118: Instances m_instances = null;
119:
120: /** attribute index of the builder */
121: int m_attrIndex = -1;
122:
123: /** class index of the builder, only relevant if class value index is set*/
124: int m_classIndex = -1;
125:
126: /** class value index of the builder */
127: int m_classValueIndex = -1;
128: }
129:
130: /**
131: * Add a new data value to the current estimator.
132: *
133: * @param data the new data value
134: * @param weight the weight assigned to the data value
135: */
136: public void addValue(double data, double weight) {
137: try {
138: throw new Exception(
139: "Method to add single value is not implemented!\n"
140: + "Estimator should implement IncrementalEstimator.");
141: } catch (Exception ex) {
142: ex.printStackTrace();
143: System.out.println(ex.getMessage());
144: }
145: }
146:
147: /**
148: * Initialize the estimator with a new dataset.
149: * Finds min and max first.
150: *
151: * @param data the dataset used to build this estimator
152: * @param attrIndex attribute the estimator is for
153: * @exception Exception if building of estimator goes wrong
154: */
155: public void addValues(Instances data, int attrIndex)
156: throws Exception {
157: // can estimator handle the data?
158: getCapabilities().testWithFail(data);
159:
160: double[] minMax = new double[2];
161:
162: try {
163: EstimatorUtils.getMinMax(data, attrIndex, minMax);
164: } catch (Exception ex) {
165: ex.printStackTrace();
166: System.out.println(ex.getMessage());
167: }
168:
169: double min = minMax[0];
170: double max = minMax[1];
171:
172: // factor is 1.0, data set has not been reduced
173: addValues(data, attrIndex, min, max, 1.0);
174: }
175:
176: /**
177: * Initialize the estimator with all values of one attribute of a dataset.
178: * Some estimator might ignore the min and max values.
179: *
180: * @param data the dataset used to build this estimator
181: * @param attrIndex attribute the estimator is for
182: * @param min minimal border of range
183: * @param max maximal border of range
184: * @param factor number of instances has been reduced to that factor
185: * @exception Exception if building of estimator goes wrong
186: */
187: public void addValues(Instances data, int attrIndex, double min,
188: double max, double factor) throws Exception {
189: // no handling of factor, would have to be overridden
190:
191: // no handling of min and max, would have to be overridden
192:
193: int numInst = data.numInstances();
194: for (int i = 1; i < numInst; i++) {
195: addValue(data.instance(i).value(attrIndex), 1.0);
196: }
197: }
198:
199: /**
200: * Initialize the estimator using only the instance of one class.
201: * It is using the values of one attribute only.
202: *
203: * @param data the dataset used to build this estimator
204: * @param attrIndex attribute the estimator is for
205: * @param classIndex index of the class attribute
206: * @param classValue the class value
207: * @exception Exception if building of estimator goes wrong
208: */
209: public void addValues(Instances data, int attrIndex,
210: int classIndex, int classValue) throws Exception {
211: // can estimator handle the data?
212: m_noClass = false;
213: getCapabilities().testWithFail(data);
214:
215: // find the minimal and the maximal value
216: double[] minMax = new double[2];
217:
218: try {
219: EstimatorUtils.getMinMax(data, attrIndex, minMax);
220: } catch (Exception ex) {
221: ex.printStackTrace();
222: System.out.println(ex.getMessage());
223: }
224:
225: double min = minMax[0];
226: double max = minMax[1];
227:
228: // extract the instances with the given class value
229: Instances workData = new Instances(data, 0);
230: double factor = getInstancesFromClass(data, attrIndex,
231: classIndex, (double) classValue, workData);
232:
233: // if no data return
234: if (workData.numInstances() == 0)
235: return;
236:
237: addValues(data, attrIndex, min, max, factor);
238: }
239:
240: /**
241: * Initialize the estimator using only the instance of one class.
242: * It is using the values of one attribute only.
243: *
244: * @param data the dataset used to build this estimator
245: * @param attrIndex attribute the estimator is for
246: * @param classIndex index of the class attribute
247: * @param classValue the class value
248: * @param min minimal value of this attribute
249: * @param max maximal value of this attribute
250: * @exception Exception if building of estimator goes wrong
251: */
252: public void addValues(Instances data, int attrIndex,
253: int classIndex, int classValue, double min, double max)
254: throws Exception {
255:
256: // extract the instances with the given class value
257: Instances workData = new Instances(data, 0);
258: double factor = getInstancesFromClass(data, attrIndex,
259: classIndex, (double) classValue, workData);
260:
261: // if no data return
262: if (workData.numInstances() == 0)
263: return;
264:
265: addValues(data, attrIndex, min, max, factor);
266: }
267:
268: /**
269: * Returns a dataset that contains all instances of a certain class value.
270: *
271: * @param data dataset to select the instances from
272: * @param attrIndex index of the relevant attribute
273: * @param classIndex index of the class attribute
274: * @param classValue the relevant class value
275: * @return a dataset with only
276: */
277: private double getInstancesFromClass(Instances data, int attrIndex,
278: int classIndex, double classValue, Instances workData) {
279: //DBO.pln("getInstancesFromClass classValue"+classValue+" workData"+data.numInstances());
280:
281: int num = 0;
282: int numClassValue = 0;
283: for (int i = 0; i < data.numInstances(); i++) {
284: if (!data.instance(i).isMissing(attrIndex)) {
285: num++;
286: if (data.instance(i).value(classIndex) == classValue) {
287: workData.add(data.instance(i));
288: numClassValue++;
289: }
290: }
291: }
292:
293: Double alphaFactor = new Double((double) numClassValue
294: / (double) num);
295: return alphaFactor;
296: }
297:
298: /**
299: * Get a probability estimate for a value.
300: *
301: * @param data the value to estimate the probability of
302: * @return the estimated probability of the supplied value
303: */
304: public abstract double getProbability(double data);
305:
306: /**
307: * Build an estimator using the options. The data is given in the options.
308: *
309: * @param est the estimator used
310: * @param options the list of options
311: * @param isIncremental true if estimator is incremental
312: * @exception Exception if something goes wrong or the user requests help on
313: * command options
314: */
315: public static void buildEstimator(Estimator est, String[] options,
316: boolean isIncremental) throws Exception {
317: //DBO.pln("buildEstimator");
318:
319: boolean debug = false;
320: boolean helpRequest;
321:
322: // read all options
323: Builder build = new Builder();
324: try {
325: setGeneralOptions(build, est, options);
326:
327: if (est instanceof OptionHandler) {
328: ((OptionHandler) est).setOptions(options);
329: }
330:
331: Utils.checkForRemainingOptions(options);
332:
333: buildEstimator(est, build.m_instances, build.m_attrIndex,
334: build.m_classIndex, build.m_classValueIndex,
335: isIncremental);
336: } catch (Exception ex) {
337: ex.printStackTrace();
338: System.out.println(ex.getMessage());
339: String specificOptions = "";
340: // Output the error and also the valid options
341: if (est instanceof OptionHandler) {
342: specificOptions += "\nEstimator options:\n\n";
343: Enumeration enumOptions = ((OptionHandler) est)
344: .listOptions();
345: while (enumOptions.hasMoreElements()) {
346: Option option = (Option) enumOptions.nextElement();
347: specificOptions += option.synopsis() + '\n'
348: + option.description() + "\n";
349: }
350: }
351:
352: String genericOptions = "\nGeneral options:\n\n"
353: + "-h\n"
354: + "\tGet help on available options.\n"
355: + "-i <file>\n"
356: + "\tThe name of the file containing input instances.\n"
357: + "\tIf not supplied then instances will be read from stdin.\n"
358: + "-a <attribute index>\n"
359: + "\tThe number of the attribute the probability distribution\n"
360: + "\testimation is done for.\n"
361: + "\t\"first\" and \"last\" are also valid entries.\n"
362: + "\tIf not supplied then no class is assigned.\n"
363: + "-c <class index>\n"
364: + "\tIf class value index is set, this attribute is taken as class.\n"
365: + "\t\"first\" and \"last\" are also valid entries.\n"
366: + "\tIf not supplied then last is default.\n"
367: + "-v <class value index>\n"
368: + "\tIf value is different to -1, select instances of this class value.\n"
369: + "\t\"first\" and \"last\" are also valid entries.\n"
370: + "\tIf not supplied then all instances are taken.\n";
371:
372: throw new Exception('\n' + ex.getMessage()
373: + specificOptions + genericOptions);
374: }
375: }
376:
377: public static void buildEstimator(Estimator est,
378: Instances instances, int attrIndex, int classIndex,
379: int classValueIndex, boolean isIncremental)
380: throws Exception {
381:
382: // DBO.pln("buildEstimator 2 " + classValueIndex);
383:
384: // non-incremental estimator add all instances at once
385: if (!isIncremental) {
386:
387: if (classValueIndex == -1) {
388: // DBO.pln("before addValues -- Estimator");
389: est.addValues(instances, attrIndex);
390: } else {
391: // DBO.pln("before addValues with classvalue -- Estimator");
392: est.addValues(instances, attrIndex, classIndex,
393: classValueIndex);
394: }
395: } else {
396: // incremental estimator, read one value at a time
397: Enumeration enumInsts = (instances).enumerateInstances();
398: while (enumInsts.hasMoreElements()) {
399: Instance instance = (Instance) enumInsts.nextElement();
400: ((IncrementalEstimator) est).addValue(instance
401: .value(attrIndex), instance.weight());
402: }
403: }
404: }
405:
406: /**
407: * Parses and sets the general options
408: * @param build contains the data used
409: * @param est the estimator used
410: * @param options the options from the command line
411: */
412: private static void setGeneralOptions(Builder build, Estimator est,
413: String[] options) throws Exception {
414: Reader input = null;
415:
416: // help request option
417: boolean helpRequest = Utils.getFlag('h', options);
418: if (helpRequest) {
419: throw new Exception("Help requested.\n");
420: }
421:
422: // instances used
423: String infileName = Utils.getOption('i', options);
424: if (infileName.length() != 0) {
425: input = new BufferedReader(new FileReader(infileName));
426: } else {
427: input = new BufferedReader(new InputStreamReader(System.in));
428: }
429:
430: build.m_instances = new Instances(input);
431:
432: // attribute index
433: String attrIndex = Utils.getOption('a', options);
434:
435: if (attrIndex.length() != 0) {
436: if (attrIndex.equals("first")) {
437: build.m_attrIndex = 0;
438: } else if (attrIndex.equals("last")) {
439: build.m_attrIndex = build.m_instances.numAttributes() - 1;
440: } else {
441: int index = Integer.parseInt(attrIndex) - 1;
442: if ((index < 0)
443: || (index >= build.m_instances.numAttributes())) {
444: throw new IllegalArgumentException(
445: "Option a: attribute index out of range.");
446: }
447: build.m_attrIndex = index;
448:
449: }
450: } else {
451: // default is the first attribute
452: build.m_attrIndex = 0;
453: }
454:
455: //class index, if not given is set to last attribute
456: String classIndex = Utils.getOption('c', options);
457: if (classIndex.length() == 0)
458: classIndex = "last";
459:
460: if (classIndex.length() != 0) {
461: if (classIndex.equals("first")) {
462: build.m_classIndex = 0;
463: } else if (classIndex.equals("last")) {
464: build.m_classIndex = build.m_instances.numAttributes() - 1;
465: } else {
466: int cl = Integer.parseInt(classIndex);
467: if (cl == -1) {
468: build.m_classIndex = build.m_instances
469: .numAttributes() - 1;
470: } else {
471: build.m_classIndex = cl - 1;
472: }
473: }
474: }
475:
476: //class value index, if not given is set to -1
477: String classValueIndex = Utils.getOption('v', options);
478: if (classValueIndex.length() != 0) {
479: if (classValueIndex.equals("first")) {
480: build.m_classValueIndex = 0;
481: } else if (classValueIndex.equals("last")) {
482: build.m_classValueIndex = build.m_instances
483: .numAttributes() - 1;
484: } else {
485: int cl = Integer.parseInt(classValueIndex);
486: if (cl == -1) {
487: build.m_classValueIndex = -1;
488: } else {
489: build.m_classValueIndex = cl - 1;
490: }
491: }
492: }
493:
494: build.m_instances.setClassIndex(build.m_classIndex);
495: }
496:
497: /**
498: * Creates a deep copy of the given estimator using serialization.
499: *
500: * @param model the estimator to copy
501: * @return a deep copy of the estimator
502: * @exception Exception if an error occurs
503: */
504: public static Estimator clone(Estimator model) throws Exception {
505:
506: return makeCopy(model);
507: }
508:
509: /**
510: * Creates a deep copy of the given estimator using serialization.
511: *
512: * @param model the estimator to copy
513: * @return a deep copy of the estimator
514: * @exception Exception if an error occurs
515: */
516: public static Estimator makeCopy(Estimator model) throws Exception {
517:
518: return (Estimator) new SerializedObject(model).getObject();
519: }
520:
521: /**
522: * Creates a given number of deep copies of the given estimator using serialization.
523: *
524: * @param model the estimator to copy
525: * @param num the number of estimator copies to create.
526: * @return an array of estimators.
527: * @exception Exception if an error occurs
528: */
529: public static Estimator[] makeCopies(Estimator model, int num)
530: throws Exception {
531:
532: if (model == null) {
533: throw new Exception("No model estimator set");
534: }
535: Estimator[] estimators = new Estimator[num];
536: SerializedObject so = new SerializedObject(model);
537: for (int i = 0; i < estimators.length; i++) {
538: estimators[i] = (Estimator) so.getObject();
539: }
540: return estimators;
541: }
542:
543: /**
544: * Tests whether the current estimation object is equal to another
545: * estimation object
546: *
547: * @param obj the object to compare against
548: * @return true if the two objects are equal
549: */
550: public boolean equals(Object obj) {
551:
552: if ((obj == null) || !(obj.getClass().equals(this .getClass()))) {
553: return false;
554: }
555: Estimator cmp = (Estimator) obj;
556: if (m_Debug != cmp.m_Debug)
557: return false;
558: if (m_classValueIndex != cmp.m_classValueIndex)
559: return false;
560: if (m_noClass != cmp.m_noClass)
561: return false;
562:
563: return true;
564: }
565:
566: /**
567: * Returns an enumeration describing the available options.
568: *
569: * @return an enumeration of all the available options.
570: */
571: public Enumeration listOptions() {
572:
573: Vector newVector = new Vector(1);
574:
575: newVector
576: .addElement(new Option(
577: "\tIf set, estimator is run in debug mode and\n"
578: + "\tmay output additional info to the console",
579: "D", 0, "-D"));
580: return newVector.elements();
581: }
582:
583: /**
584: * Parses a given list of options. Valid options are:<p>
585: *
586: * -D <br>
587: * If set, estimator is run in debug mode and
588: * may output additional info to the console.<p>
589: *
590: * @param options the list of options as an array of strings
591: * @exception Exception if an option is not supported
592: */
593: public void setOptions(String[] options) throws Exception {
594:
595: setDebug(Utils.getFlag('D', options));
596: }
597:
598: /**
599: * Gets the current settings of the Estimator.
600: *
601: * @return an array of strings suitable for passing to setOptions
602: */
603: public String[] getOptions() {
604:
605: String[] options;
606: if (getDebug()) {
607: options = new String[1];
608: options[0] = "-D";
609: } else {
610: options = new String[0];
611: }
612: return options;
613: }
614:
615: /**
616: * Creates a new instance of a estimatorr given it's class name and
617: * (optional) arguments to pass to it's setOptions method. If the
618: * classifier implements OptionHandler and the options parameter is
619: * non-null, the classifier will have it's options set.
620: *
621: * @param name the fully qualified class name of the estimatorr
622: * @param options an array of options suitable for passing to setOptions. May
623: * be null.
624: * @return the newly created classifier, ready for use.
625: * @exception Exception if the classifier name is invalid, or the options
626: * supplied are not acceptable to the classifier
627: */
628: public static Estimator forName(String name, String[] options)
629: throws Exception {
630:
631: return (Estimator) Utils
632: .forName(Estimator.class, name, options);
633: }
634:
635: /**
636: * Set debugging mode.
637: *
638: * @param debug true if debug output should be printed
639: */
640: public void setDebug(boolean debug) {
641:
642: m_Debug = debug;
643: }
644:
645: /**
646: * Get whether debugging is turned on.
647: *
648: * @return true if debugging output is on
649: */
650: public boolean getDebug() {
651:
652: return m_Debug;
653: }
654:
655: /**
656: * Returns the tip text for this property
657: * @return tip text for this property suitable for
658: * displaying in the explorer/experimenter gui
659: */
660: public String debugTipText() {
661: return "If set to true, estimator may output additional info to "
662: + "the console.";
663: }
664:
665: /**
666: * Returns the Capabilities of this Estimator. Derived estimators have to
667: * override this method to enable capabilities.
668: *
669: * @return the capabilities of this object
670: * @see Capabilities
671: */
672: public Capabilities getCapabilities() {
673: Capabilities result = new Capabilities(this );
674:
675: // class
676: if (!m_noClass) {
677: result.enable(Capability.NOMINAL_CLASS);
678: result.enable(Capability.MISSING_CLASS_VALUES);
679: } else {
680: result.enable(Capability.NO_CLASS);
681: }
682:
683: return result;
684: }
685:
686: /**
687: * Test if the estimator can handle the data.
688: * @param data the dataset the estimator takes an attribute from
689: * @param attrIndex the index of the attribute
690: * @see Capabilities
691: */
692: public void testCapabilities(Instances data, int attrIndex)
693: throws Exception {
694: getCapabilities().testWithFail(data);
695: getCapabilities().testWithFail(data.attribute(attrIndex));
696: }
697: }
|