Source Code Cross Referenced for AODE.java in » Science » weka » weka » classifiers » bayes » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Science » weka » weka.classifiers.bayes
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         *    This program is free software; you can redistribute it and/or modify
003:         *    it under the terms of the GNU General Public License as published by
004:         *    the Free Software Foundation; either version 2 of the License, or
005:         *    (at your option) any later version.
006:         *
007:         *    This program is distributed in the hope that it will be useful,
008:         *    but WITHOUT ANY WARRANTY; without even the implied warranty of
009:         *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
010:         *    GNU General Public License for more details.
011:         *
012:         *    You should have received a copy of the GNU General Public License
013:         *    along with this program; if not, write to the Free Software
014:         *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015:         */
016:
017:        /*
018:         *    AODE.java
019:         *    Copyright (C) 2003
020:         *    Algorithm developed by: Geoff Webb
021:         *    Code written by: Janice Boughton & Zhihai Wang
022:         */
023:
024:        package weka.classifiers.bayes;
025:
026:        import weka.classifiers.Classifier;
027:        import weka.classifiers.UpdateableClassifier;
028:        import weka.core.Capabilities;
029:        import weka.core.Instance;
030:        import weka.core.Instances;
031:        import weka.core.Option;
032:        import weka.core.OptionHandler;
033:        import weka.core.TechnicalInformation;
034:        import weka.core.TechnicalInformationHandler;
035:        import weka.core.Utils;
036:        import weka.core.WeightedInstancesHandler;
037:        import weka.core.Capabilities.Capability;
038:        import weka.core.TechnicalInformation.Field;
039:        import weka.core.TechnicalInformation.Type;
040:
041:        import java.util.Enumeration;
042:        import java.util.Vector;
043:
044:        /**
045:         <!-- globalinfo-start -->
046:         * AODE achieves highly accurate classification by averaging over all of a small space of alternative naive-Bayes-like models that have weaker (and hence less detrimental) independence assumptions than naive Bayes. The resulting algorithm is computationally efficient while delivering highly accurate classification on many learning  tasks.<br/>
047:         * <br/>
048:         * For more information, see<br/>
049:         * <br/>
050:         * G. Webb, J. Boughton, Z. Wang (2005). Not So Naive Bayes: Aggregating One-Dependence Estimators. Machine Learning. 58(1):5-24.<br/>
051:         * <br/>
052:         * Further papers are available at<br/>
053:         *   http://www.csse.monash.edu.au/~webb/.<br/>
054:         * <br/>
055:         * Can use an m-estimate for smoothing base probability estimates in place of the Laplace correction (via option -M).<br/>
056:         * Default frequency limit set to 1.
057:         * <p/>
058:         <!-- globalinfo-end -->
059:         * 
060:         <!-- technical-bibtex-start -->
061:         * BibTeX:
062:         * <pre>
063:         * &#64;article{Webb2005,
064:         *    author = {G. Webb and J. Boughton and Z. Wang},
065:         *    journal = {Machine Learning},
066:         *    number = {1},
067:         *    pages = {5-24},
068:         *    title = {Not So Naive Bayes: Aggregating One-Dependence Estimators},
069:         *    volume = {58},
070:         *    year = {2005}
071:         * }
072:         * </pre>
073:         * <p/>
074:         <!-- technical-bibtex-end -->
075:         *
076:         <!-- options-start -->
077:         * Valid options are: <p/>
078:         * 
079:         * <pre> -D
080:         *  Output debugging information
081:         * </pre>
082:         * 
083:         * <pre> -F &lt;int&gt;
084:         *  Impose a frequency limit for superParents
085:         *  (default is 1)</pre>
086:         * 
087:         * <pre> -M
088:         *  Use m-estimate instead of laplace correction
089:         * </pre>
090:         * 
091:         * <pre> -W &lt;int&gt;
092:         *  Specify a weight to use with m-estimate
093:         *  (default is 1)</pre>
094:         * 
095:         <!-- options-end -->
096:         *
097:         * @author Janice Boughton (jrbought@csse.monash.edu.au)
098:         * @author Zhihai Wang (zhw@csse.monash.edu.au)
099:         * @version $Revision: 1.17 $
100:         */
101:        public class AODE extends Classifier implements  OptionHandler,
102:                WeightedInstancesHandler, UpdateableClassifier,
103:                TechnicalInformationHandler {
104:
105:            /** for serialization */
106:            static final long serialVersionUID = 9197439980415113523L;
107:
108:            /**
109:             * 3D array (m_NumClasses * m_TotalAttValues * m_TotalAttValues)
110:             * of attribute counts, i.e., the number of times an attribute value occurs
111:             * in conjunction with another attribute value and a class value.  
112:             */
113:            private double[][][] m_CondiCounts;
114:
115:            /** The number of times each class value occurs in the dataset */
116:            private double[] m_ClassCounts;
117:
118:            /** The sums of attribute-class counts  
119:             *    -- if there are no missing values for att, then
120:             *       m_SumForCounts[classVal][att] will be the same as
121:             *       m_ClassCounts[classVal] 
122:             */
123:            private double[][] m_SumForCounts;
124:
125:            /** The number of classes */
126:            private int m_NumClasses;
127:
128:            /** The number of attributes in dataset, including class */
129:            private int m_NumAttributes;
130:
131:            /** The number of instances in the dataset */
132:            private int m_NumInstances;
133:
134:            /** The index of the class attribute */
135:            private int m_ClassIndex;
136:
137:            /** The dataset */
138:            private Instances m_Instances;
139:
140:            /**
141:             * The total number of values (including an extra for each attribute's 
142:             * missing value, which are included in m_CondiCounts) for all attributes 
143:             * (not including class). E.g., for three atts each with two possible values,
144:             * m_TotalAttValues would be 9 (6 values + 3 missing).
145:             * This variable is used when allocating space for m_CondiCounts matrix.
146:             */
147:            private int m_TotalAttValues;
148:
149:            /** The starting index (in the m_CondiCounts matrix) of the values for each
150:             * attribute */
151:            private int[] m_StartAttIndex;
152:
153:            /** The number of values for each attribute */
154:            private int[] m_NumAttValues;
155:
156:            /** The frequency of each attribute value for the dataset */
157:            private double[] m_Frequencies;
158:
159:            /** The number of valid class values observed in dataset 
160:             *  -- with no missing classes, this number is the same as m_NumInstances.
161:             */
162:            private double m_SumInstances;
163:
164:            /** An att's frequency must be this value or more to be a superParent */
165:            private int m_Limit = 1;
166:
167:            /** If true, outputs debugging info */
168:            private boolean m_Debug = false;
169:
170:            /** flag for using m-estimates */
171:            private boolean m_MEstimates = false;
172:
173:            /** value for m in m-estimate */
174:            private int m_Weight = 1;
175:
176:            /**
177:             * Returns a string describing this classifier
178:             * @return a description of the classifier suitable for
179:             * displaying in the explorer/experimenter gui
180:             */
181:            public String globalInfo() {
182:
183:                return "AODE achieves highly accurate classification by averaging over "
184:                        + "all of a small space of alternative naive-Bayes-like models that have "
185:                        + "weaker (and hence less detrimental) independence assumptions than "
186:                        + "naive Bayes. The resulting algorithm is computationally efficient "
187:                        + "while delivering highly accurate classification on many learning  "
188:                        + "tasks.\n\n"
189:                        + "For more information, see\n\n"
190:                        + getTechnicalInformation().toString()
191:                        + "\n\n"
192:                        + "Further papers are available at\n"
193:                        + "  http://www.csse.monash.edu.au/~webb/.\n\n"
194:                        + "Can use an m-estimate for smoothing base probability estimates "
195:                        + "in place of the Laplace correction (via option -M).\n"
196:                        + "Default frequency limit set to 1.";
197:            }
198:
199:            /**
200:             * Returns an instance of a TechnicalInformation object, containing 
201:             * detailed information about the technical background of this class,
202:             * e.g., paper reference or book this class is based on.
203:             * 
204:             * @return the technical information about this class
205:             */
206:            public TechnicalInformation getTechnicalInformation() {
207:                TechnicalInformation result;
208:
209:                result = new TechnicalInformation(Type.ARTICLE);
210:                result.setValue(Field.AUTHOR,
211:                        "G. Webb and J. Boughton and Z. Wang");
212:                result.setValue(Field.YEAR, "2005");
213:                result
214:                        .setValue(Field.TITLE,
215:                                "Not So Naive Bayes: Aggregating One-Dependence Estimators");
216:                result.setValue(Field.JOURNAL, "Machine Learning");
217:                result.setValue(Field.VOLUME, "58");
218:                result.setValue(Field.NUMBER, "1");
219:                result.setValue(Field.PAGES, "5-24");
220:
221:                return result;
222:            }
223:
224:            /**
225:             * Returns default capabilities of the classifier.
226:             *
227:             * @return      the capabilities of this classifier
228:             */
229:            public Capabilities getCapabilities() {
230:                Capabilities result = super .getCapabilities();
231:
232:                // attributes
233:                result.enable(Capability.NOMINAL_ATTRIBUTES);
234:                result.enable(Capability.MISSING_VALUES);
235:
236:                // class
237:                result.enable(Capability.NOMINAL_CLASS);
238:                result.enable(Capability.MISSING_CLASS_VALUES);
239:
240:                // instances
241:                result.setMinimumNumberInstances(0);
242:
243:                return result;
244:            }
245:
246:            /**
247:             * Generates the classifier.
248:             *
249:             * @param instances set of instances serving as training data
250:             * @throws Exception if the classifier has not been generated
251:             * successfully
252:             */
253:            public void buildClassifier(Instances instances) throws Exception {
254:
255:                // can classifier handle the data?
256:                getCapabilities().testWithFail(instances);
257:
258:                // remove instances with missing class
259:                m_Instances = new Instances(instances);
260:                m_Instances.deleteWithMissingClass();
261:
262:                // reset variable for this fold
263:                m_SumInstances = 0;
264:                m_ClassIndex = instances.classIndex();
265:                m_NumInstances = m_Instances.numInstances();
266:                m_NumAttributes = m_Instances.numAttributes();
267:                m_NumClasses = m_Instances.numClasses();
268:
269:                // allocate space for attribute reference arrays
270:                m_StartAttIndex = new int[m_NumAttributes];
271:                m_NumAttValues = new int[m_NumAttributes];
272:
273:                m_TotalAttValues = 0;
274:                for (int i = 0; i < m_NumAttributes; i++) {
275:                    if (i != m_ClassIndex) {
276:                        m_StartAttIndex[i] = m_TotalAttValues;
277:                        m_NumAttValues[i] = m_Instances.attribute(i)
278:                                .numValues();
279:                        m_TotalAttValues += m_NumAttValues[i] + 1;
280:                        // + 1 so room for missing value count
281:                    } else {
282:                        // m_StartAttIndex[i] = -1;  // class isn't included
283:                        m_NumAttValues[i] = m_NumClasses;
284:                    }
285:                }
286:
287:                // allocate space for counts and frequencies
288:                m_CondiCounts = new double[m_NumClasses][m_TotalAttValues][m_TotalAttValues];
289:                m_ClassCounts = new double[m_NumClasses];
290:                m_SumForCounts = new double[m_NumClasses][m_NumAttributes];
291:                m_Frequencies = new double[m_TotalAttValues];
292:
293:                // calculate the counts
294:                for (int k = 0; k < m_NumInstances; k++) {
295:                    addToCounts((Instance) m_Instances.instance(k));
296:                }
297:
298:                // free up some space
299:                m_Instances = new Instances(m_Instances, 0);
300:            }
301:
302:            /**
303:             * Updates the classifier with the given instance.
304:             *
305:             * @param instance the new training instance to include in the model 
306:             */
307:            public void updateClassifier(Instance instance) {
308:                this .addToCounts(instance);
309:            }
310:
311:            /** 
312:             * Puts an instance's values into m_CondiCounts, m_ClassCounts and 
313:             * m_SumInstances.
314:             *
315:             * @param instance  the instance whose values are to be put into the counts
316:             *                  variables
317:             */
318:            private void addToCounts(Instance instance) {
319:
320:                double[] countsPointer;
321:
322:                if (instance.classIsMissing())
323:                    return; // ignore instances with missing class
324:
325:                int classVal = (int) instance.classValue();
326:                int weight = (int) instance.weight();
327:
328:                m_ClassCounts[classVal] += weight;
329:                m_SumInstances += weight;
330:
331:                // store instance's att val indexes in an array, b/c accessing it 
332:                // in loop(s) is more efficient
333:                int[] attIndex = new int[m_NumAttributes];
334:                for (int i = 0; i < m_NumAttributes; i++) {
335:                    if (i == m_ClassIndex)
336:                        attIndex[i] = -1; // we don't use the class attribute in counts
337:                    else {
338:                        if (instance.isMissing(i))
339:                            attIndex[i] = m_StartAttIndex[i]
340:                                    + m_NumAttValues[i];
341:                        else
342:                            attIndex[i] = m_StartAttIndex[i]
343:                                    + (int) instance.value(i);
344:                    }
345:                }
346:
347:                for (int Att1 = 0; Att1 < m_NumAttributes; Att1++) {
348:                    if (attIndex[Att1] == -1)
349:                        continue; // avoid pointless looping as Att1 is currently the class attribute
350:
351:                    m_Frequencies[attIndex[Att1]] += weight;
352:
353:                    // if this is a missing value, we don't want to increase sumforcounts
354:                    if (!instance.isMissing(Att1))
355:                        m_SumForCounts[classVal][Att1] += weight;
356:
357:                    // save time by referencing this now, rather than do it repeatedly in the loop
358:                    countsPointer = m_CondiCounts[classVal][attIndex[Att1]];
359:
360:                    for (int Att2 = 0; Att2 < m_NumAttributes; Att2++) {
361:                        if (attIndex[Att2] != -1) {
362:                            countsPointer[attIndex[Att2]] += weight;
363:                        }
364:                    }
365:                }
366:            }
367:
368:            /**
369:             * Calculates the class membership probabilities for the given test
370:             * instance.
371:             *
372:             * @param instance the instance to be classified
373:             * @return predicted class probability distribution
374:             * @throws Exception if there is a problem generating the prediction
375:             */
376:            public double[] distributionForInstance(Instance instance)
377:                    throws Exception {
378:
379:                // accumulates posterior probabilities for each class
380:                double[] probs = new double[m_NumClasses];
381:
382:                // index for parent attribute value, and a count of parents used
383:                int pIndex, parentCount;
384:
385:                // pointers for efficiency
386:                // for current class, point to joint frequency for any pair of att values
387:                double[][] countsForClass;
388:                // for current class & parent, point to joint frequency for any att value
389:                double[] countsForClassParent;
390:
391:                // store instance's att indexes in an int array, so accessing them 
392:                // is more efficient in loop(s).
393:                int[] attIndex = new int[m_NumAttributes];
394:                for (int att = 0; att < m_NumAttributes; att++) {
395:                    if (instance.isMissing(att) || att == m_ClassIndex)
396:                        attIndex[att] = -1; // can't use class or missing values in calculations
397:                    else
398:                        attIndex[att] = m_StartAttIndex[att]
399:                                + (int) instance.value(att);
400:                }
401:
402:                // calculate probabilities for each possible class value
403:                for (int classVal = 0; classVal < m_NumClasses; classVal++) {
404:
405:                    probs[classVal] = 0;
406:                    double spodeP = 0; // P(X,y) for current parent and class
407:                    parentCount = 0;
408:
409:                    countsForClass = m_CondiCounts[classVal];
410:
411:                    // each attribute has a turn of being the parent
412:                    for (int parent = 0; parent < m_NumAttributes; parent++) {
413:                        if (attIndex[parent] == -1)
414:                            continue; // skip class attribute or missing value
415:
416:                        // determine correct index for the parent in m_CondiCounts matrix
417:                        pIndex = attIndex[parent];
418:
419:                        // check that the att value has a frequency of m_Limit or greater
420:                        if (m_Frequencies[pIndex] < m_Limit)
421:                            continue;
422:
423:                        countsForClassParent = countsForClass[pIndex];
424:
425:                        // block the parent from being its own child
426:                        attIndex[parent] = -1;
427:
428:                        parentCount++;
429:
430:                        // joint frequency of class and parent
431:                        double classparentfreq = countsForClassParent[pIndex];
432:
433:                        // find the number of missing values for parent's attribute
434:                        double missing4ParentAtt = m_Frequencies[m_StartAttIndex[parent]
435:                                + m_NumAttValues[parent]];
436:
437:                        // calculate the prior probability -- P(parent & classVal)
438:                        if (!m_MEstimates) {
439:                            spodeP = (classparentfreq + 1.0)
440:                                    / ((m_SumInstances - missing4ParentAtt) + m_NumClasses
441:                                            * m_NumAttValues[parent]);
442:                        } else {
443:                            spodeP = (classparentfreq + ((double) m_Weight / (double) (m_NumClasses * m_NumAttValues[parent])))
444:                                    / ((m_SumInstances - missing4ParentAtt) + m_Weight);
445:                        }
446:
447:                        // take into account the value of each attribute
448:                        for (int att = 0; att < m_NumAttributes; att++) {
449:                            if (attIndex[att] == -1)
450:                                continue;
451:
452:                            double missingForParentandChildAtt = countsForClassParent[m_StartAttIndex[att]
453:                                    + m_NumAttValues[att]];
454:
455:                            if (!m_MEstimates) {
456:                                spodeP *= (countsForClassParent[attIndex[att]] + 1.0)
457:                                        / ((classparentfreq - missingForParentandChildAtt) + m_NumAttValues[att]);
458:                            } else {
459:                                spodeP *= (countsForClassParent[attIndex[att]] + ((double) m_Weight / (double) m_NumAttValues[att]))
460:                                        / ((classparentfreq - missingForParentandChildAtt) + m_Weight);
461:                            }
462:                        }
463:
464:                        // add this probability to the overall probability
465:                        probs[classVal] += spodeP;
466:
467:                        // unblock the parent
468:                        attIndex[parent] = pIndex;
469:                    }
470:
471:                    // check that at least one att was a parent
472:                    if (parentCount < 1) {
473:
474:                        // do plain naive bayes conditional prob
475:                        probs[classVal] = NBconditionalProb(instance, classVal);
476:
477:                    } else {
478:
479:                        // divide by number of parent atts to get the mean
480:                        probs[classVal] /= (double) (parentCount);
481:                    }
482:                }
483:
484:                Utils.normalize(probs);
485:                return probs;
486:            }
487:
488:            /**
489:             * Calculates the probability of the specified class for the given test
490:             * instance, using naive Bayes.
491:             *
492:             * @param instance the instance to be classified
493:             * @param classVal the class for which to calculate the probability
494:             * @return predicted class probability
495:             */
496:            public double NBconditionalProb(Instance instance, int classVal) {
497:
498:                double prob;
499:                double[][] pointer;
500:
501:                // calculate the prior probability
502:                if (!m_MEstimates) {
503:                    prob = (m_ClassCounts[classVal] + 1.0)
504:                            / (m_SumInstances + m_NumClasses);
505:                } else {
506:                    prob = (m_ClassCounts[classVal] + ((double) m_Weight / (double) m_NumClasses))
507:                            / (m_SumInstances + m_Weight);
508:                }
509:                pointer = m_CondiCounts[classVal];
510:
511:                // consider effect of each att value
512:                for (int att = 0; att < m_NumAttributes; att++) {
513:                    if (att == m_ClassIndex || instance.isMissing(att))
514:                        continue;
515:
516:                    // determine correct index for att in m_CondiCounts
517:                    int aIndex = m_StartAttIndex[att]
518:                            + (int) instance.value(att);
519:
520:                    if (!m_MEstimates) {
521:                        prob *= (double) (pointer[aIndex][aIndex] + 1.0)
522:                                / ((double) m_SumForCounts[classVal][att] + m_NumAttValues[att]);
523:                    } else {
524:                        prob *= (double) (pointer[aIndex][aIndex] + ((double) m_Weight / (double) m_NumAttValues[att]))
525:                                / (double) (m_SumForCounts[classVal][att] + m_Weight);
526:                    }
527:                }
528:                return prob;
529:            }
530:
531:            /**
532:             * Returns an enumeration describing the available options
533:             *
534:             * @return an enumeration of all the available options
535:             */
536:            public Enumeration listOptions() {
537:
538:                Vector newVector = new Vector(4);
539:
540:                newVector.addElement(new Option(
541:                        "\tOutput debugging information\n", "D", 0, "-D"));
542:                newVector.addElement(new Option(
543:                        "\tImpose a frequency limit for superParents\n"
544:                                + "\t(default is 1)", "F", 1, "-F <int>"));
545:                newVector.addElement(new Option(
546:                        "\tUse m-estimate instead of laplace correction\n",
547:                        "M", 0, "-M"));
548:                newVector.addElement(new Option(
549:                        "\tSpecify a weight to use with m-estimate\n"
550:                                + "\t(default is 1)", "W", 1, "-W <int>"));
551:                return newVector.elements();
552:            }
553:
554:            /**
555:             * Parses a given list of options. <p/>
556:             * 
557:             <!-- options-start -->
558:             * Valid options are: <p/>
559:             * 
560:             * <pre> -D
561:             *  Output debugging information
562:             * </pre>
563:             * 
564:             * <pre> -F &lt;int&gt;
565:             *  Impose a frequency limit for superParents
566:             *  (default is 1)</pre>
567:             * 
568:             * <pre> -M
569:             *  Use m-estimate instead of laplace correction
570:             * </pre>
571:             * 
572:             * <pre> -W &lt;int&gt;
573:             *  Specify a weight to use with m-estimate
574:             *  (default is 1)</pre>
575:             * 
576:             <!-- options-end -->
577:             *
578:             * @param options the list of options as an array of strings
579:             * @throws Exception if an option is not supported
580:             */
581:            public void setOptions(String[] options) throws Exception {
582:
583:                m_Debug = Utils.getFlag('D', options);
584:
585:                String Freq = Utils.getOption('F', options);
586:                if (Freq.length() != 0)
587:                    m_Limit = Integer.parseInt(Freq);
588:                else
589:                    m_Limit = 1;
590:
591:                m_MEstimates = Utils.getFlag('M', options);
592:                String weight = Utils.getOption('W', options);
593:                if (weight.length() != 0) {
594:                    if (!m_MEstimates)
595:                        throw new Exception(
596:                                "Can't use Laplace AND m-estimate weight. Choose one.");
597:                    m_Weight = Integer.parseInt(weight);
598:                } else {
599:                    if (m_MEstimates)
600:                        m_Weight = 1;
601:                }
602:
603:                Utils.checkForRemainingOptions(options);
604:            }
605:
606:            /**
607:             * Gets the current settings of the classifier.
608:             *
609:             * @return an array of strings suitable for passing to setOptions
610:             */
611:            public String[] getOptions() {
612:                Vector result = new Vector();
613:
614:                if (m_Debug)
615:                    result.add("-D");
616:
617:                result.add("-F");
618:                result.add("" + m_Limit);
619:
620:                if (m_MEstimates) {
621:                    result.add("-M");
622:                    result.add("-W");
623:                    result.add("" + m_Weight);
624:                }
625:
626:                return (String[]) result.toArray(new String[result.size()]);
627:            }
628:
629:            /**
630:             * Returns the tip text for this property
631:             * @return tip text for this property suitable for
632:             * displaying in the explorer/experimenter gui
633:             */
634:            public String weightTipText() {
635:                return "Set the weight for m-estimate.";
636:            }
637:
638:            /**
639:             * Sets the weight for m-estimate
640:             *
641:             * @param w the weight
642:             */
643:            public void setWeight(int w) {
644:                if (!getUseMEstimates()) {
645:                    System.out
646:                            .println("Weight is only used in conjunction with m-estimate - ignored!");
647:                } else {
648:                    if (w > 0)
649:                        m_Weight = w;
650:                    else
651:                        System.out.println("Weight must be greater than 0!");
652:                }
653:            }
654:
655:            /**
656:             * Gets the weight used in m-estimate
657:             *
658:             * @return the frequency limit
659:             */
660:            public int getWeight() {
661:                return m_Weight;
662:            }
663:
664:            /**
665:             * Returns the tip text for this property
666:             * @return tip text for this property suitable for
667:             * displaying in the explorer/experimenter gui
668:             */
669:            public String useMEstimatesTipText() {
670:                return "Use m-estimate instead of laplace correction.";
671:            }
672:
673:            /**
674:             * Gets if m-estimaces is being used.
675:             *
676:             * @return Value of m_MEstimates.
677:             */
678:            public boolean getUseMEstimates() {
679:                return m_MEstimates;
680:            }
681:
682:            /**
683:             * Sets if m-estimates is to be used.
684:             *
685:             * @param value     Value to assign to m_MEstimates.
686:             */
687:            public void setUseMEstimates(boolean value) {
688:                m_MEstimates = value;
689:            }
690:
691:            /**
692:             * Returns the tip text for this property
693:             * @return tip text for this property suitable for
694:             * displaying in the explorer/experimenter gui
695:             */
696:            public String frequencyLimitTipText() {
697:                return "Attributes with a frequency in the train set below "
698:                        + "this value aren't used as parents.";
699:            }
700:
701:            /**
702:             * Sets the frequency limit
703:             *
704:             * @param f the frequency limit
705:             */
706:            public void setFrequencyLimit(int f) {
707:                m_Limit = f;
708:            }
709:
710:            /**
711:             * Gets the frequency limit.
712:             *
713:             * @return the frequency limit
714:             */
715:            public int getFrequencyLimit() {
716:                return m_Limit;
717:            }
718:
719:            /**
720:             * Returns a description of the classifier.
721:             *
722:             * @return a description of the classifier as a string.
723:             */
724:            public String toString() {
725:
726:                StringBuffer text = new StringBuffer();
727:
728:                text.append("The AODE Classifier");
729:                if (m_Instances == null) {
730:                    text.append(": No model built yet.");
731:                } else {
732:                    try {
733:                        for (int i = 0; i < m_NumClasses; i++) {
734:                            // print to string, the prior probabilities of class values
735:                            text
736:                                    .append("\nClass "
737:                                            + m_Instances.classAttribute()
738:                                                    .value(i)
739:                                            + ": Prior probability = "
740:                                            + Utils
741:                                                    .doubleToString(
742:                                                            ((m_ClassCounts[i] + 1) / (m_SumInstances + m_NumClasses)),
743:                                                            4, 2) + "\n\n");
744:                        }
745:
746:                        text.append("Dataset: " + m_Instances.relationName()
747:                                + "\n" + "Instances: " + m_NumInstances + "\n"
748:                                + "Attributes: " + m_NumAttributes + "\n"
749:                                + "Frequency limit for superParents: "
750:                                + m_Limit + "\n");
751:                        text.append("Correction: ");
752:                        if (!m_MEstimates)
753:                            text.append("laplace\n");
754:                        else
755:                            text.append("m-estimate (m=" + m_Weight + ")\n");
756:
757:                    } catch (Exception ex) {
758:                        text.append(ex.getMessage());
759:                    }
760:                }
761:
762:                return text.toString();
763:            }
764:
765:            /**
766:             * Main method for testing this class.
767:             *
768:             * @param argv the options
769:             */
770:            public static void main(String[] argv) {
771:                runClassifier(new AODE(), argv);
772:            }
773:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.