01: /*
02: * This program is free software; you can redistribute it and/or modify
03: * it under the terms of the GNU General Public License as published by
04: * the Free Software Foundation; either version 2 of the License, or
05: * (at your option) any later version.
06: *
07: * This program is distributed in the hope that it will be useful,
08: * but WITHOUT ANY WARRANTY; without even the implied warranty of
09: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10: * GNU General Public License for more details.
11: *
12: * You should have received a copy of the GNU General Public License
13: * along with this program; if not, write to the Free Software
14: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15: */
16:
17: /*
18: * EntropyBasedSplitCrit.java
19: * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20: *
21: */
22:
23: package weka.classifiers.trees.j48;
24:
25: /**
26: * "Abstract" class for computing splitting criteria
27: * based on the entropy of a class distribution.
28: *
29: * @author Eibe Frank (eibe@cs.waikato.ac.nz)
30: * @version $Revision: 1.7 $
31: */
32: public abstract class EntropyBasedSplitCrit extends SplitCriterion {
33:
34: /** for serialization */
35: private static final long serialVersionUID = -2618691439791653056L;
36:
37: /** The log of 2. */
38: protected static double log2 = Math.log(2);
39:
40: /**
41: * Help method for computing entropy.
42: */
43: public final double logFunc(double num) {
44:
45: // Constant hard coded for efficiency reasons
46: if (num < 1e-6)
47: return 0;
48: else
49: return num * Math.log(num) / log2;
50: }
51:
52: /**
53: * Computes entropy of distribution before splitting.
54: */
55: public final double oldEnt(Distribution bags) {
56:
57: double returnValue = 0;
58: int j;
59:
60: for (j = 0; j < bags.numClasses(); j++)
61: returnValue = returnValue + logFunc(bags.perClass(j));
62: return logFunc(bags.total()) - returnValue;
63: }
64:
65: /**
66: * Computes entropy of distribution after splitting.
67: */
68: public final double newEnt(Distribution bags) {
69:
70: double returnValue = 0;
71: int i, j;
72:
73: for (i = 0; i < bags.numBags(); i++) {
74: for (j = 0; j < bags.numClasses(); j++)
75: returnValue = returnValue
76: + logFunc(bags.perClassPerBag(i, j));
77: returnValue = returnValue - logFunc(bags.perBag(i));
78: }
79: return -returnValue;
80: }
81:
82: /**
83: * Computes entropy after splitting without considering the
84: * class values.
85: */
86: public final double splitEnt(Distribution bags) {
87:
88: double returnValue = 0;
89: int i;
90:
91: for (i = 0; i < bags.numBags(); i++)
92: returnValue = returnValue + logFunc(bags.perBag(i));
93: return logFunc(bags.total()) - returnValue;
94: }
95: }
|