001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * TabuSearch.java
019: * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.classifiers.bayes.net.search.local;
024:
025: import weka.classifiers.bayes.BayesNet;
026: import weka.core.Instances;
027: import weka.core.Option;
028: import weka.core.TechnicalInformation;
029: import weka.core.TechnicalInformation.Type;
030: import weka.core.TechnicalInformation.Field;
031: import weka.core.TechnicalInformationHandler;
032: import weka.core.Utils;
033:
034: import java.util.Enumeration;
035: import java.util.Vector;
036:
037: /**
038: <!-- globalinfo-start -->
039: * This Bayes Network learning algorithm uses tabu search for finding a well scoring Bayes network structure. Tabu search is hill climbing till an optimum is reached. The following step is the least worst possible step. The last X steps are kept in a list and none of the steps in this so called tabu list is considered in taking the next step. The best network found in this traversal is returned.<br/>
040: * <br/>
041: * For more information see:<br/>
042: * <br/>
043: * R.R. Bouckaert (1995). Bayesian Belief Networks: from Construction to Inference. Utrecht, Netherlands.
044: * <p/>
045: <!-- globalinfo-end -->
046: *
047: <!-- technical-bibtex-start -->
048: * BibTeX:
049: * <pre>
050: * @phdthesis{Bouckaert1995,
051: * address = {Utrecht, Netherlands},
052: * author = {R.R. Bouckaert},
053: * institution = {University of Utrecht},
054: * title = {Bayesian Belief Networks: from Construction to Inference},
055: * year = {1995}
056: * }
057: * </pre>
058: * <p/>
059: <!-- technical-bibtex-end -->
060: *
061: <!-- options-start -->
062: * Valid options are: <p/>
063: *
064: * <pre> -L <integer>
065: * Tabu list length</pre>
066: *
067: * <pre> -U <integer>
068: * Number of runs</pre>
069: *
070: * <pre> -P <nr of parents>
071: * Maximum number of parents</pre>
072: *
073: * <pre> -R
074: * Use arc reversal operation.
075: * (default false)</pre>
076: *
077: * <pre> -P <nr of parents>
078: * Maximum number of parents</pre>
079: *
080: * <pre> -R
081: * Use arc reversal operation.
082: * (default false)</pre>
083: *
084: * <pre> -N
085: * Initial structure is empty (instead of Naive Bayes)</pre>
086: *
087: * <pre> -mbc
088: * Applies a Markov Blanket correction to the network structure,
089: * after a network structure is learned. This ensures that all
090: * nodes in the network are part of the Markov blanket of the
091: * classifier node.</pre>
092: *
093: * <pre> -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
094: * Score type (BAYES, BDeu, MDL, ENTROPY and AIC)</pre>
095: *
096: <!-- options-end -->
097: *
098: * @author Remco Bouckaert (rrb@xm.co.nz)
099: * @version $Revision: 1.4 $
100: */
101: public class TabuSearch extends HillClimber implements
102: TechnicalInformationHandler {
103:
104: /** for serialization */
105: static final long serialVersionUID = 1457344073228786447L;
106:
107: /** number of runs **/
108: int m_nRuns = 10;
109:
110: /** size of tabu list **/
111: int m_nTabuList = 5;
112:
113: /** the actual tabu list **/
114: Operation[] m_oTabuList = null;
115:
116: /**
117: * Returns an instance of a TechnicalInformation object, containing
118: * detailed information about the technical background of this class,
119: * e.g., paper reference or book this class is based on.
120: *
121: * @return the technical information about this class
122: */
123: public TechnicalInformation getTechnicalInformation() {
124: TechnicalInformation result;
125:
126: result = new TechnicalInformation(Type.PHDTHESIS);
127: result.setValue(Field.AUTHOR, "R.R. Bouckaert");
128: result.setValue(Field.YEAR, "1995");
129: result
130: .setValue(Field.TITLE,
131: "Bayesian Belief Networks: from Construction to Inference");
132: result.setValue(Field.INSTITUTION, "University of Utrecht");
133: result.setValue(Field.ADDRESS, "Utrecht, Netherlands");
134:
135: return result;
136: }
137:
138: /**
139: * search determines the network structure/graph of the network
140: * with the Tabu search algorithm.
141: *
142: * @param bayesNet the network
143: * @param instances the data to use
144: * @throws Exception if something goes wrong
145: */
146: protected void search(BayesNet bayesNet, Instances instances)
147: throws Exception {
148: m_oTabuList = new Operation[m_nTabuList];
149: int iCurrentTabuList = 0;
150: initCache(bayesNet, instances);
151:
152: // keeps track of score pf best structure found so far
153: double fBestScore;
154: double fCurrentScore = 0.0;
155: for (int iAttribute = 0; iAttribute < instances.numAttributes(); iAttribute++) {
156: fCurrentScore += calcNodeScore(iAttribute);
157: }
158:
159: // keeps track of best structure found so far
160: BayesNet bestBayesNet;
161:
162: // initialize bestBayesNet
163: fBestScore = fCurrentScore;
164: bestBayesNet = new BayesNet();
165: bestBayesNet.m_Instances = instances;
166: bestBayesNet.initStructure();
167: copyParentSets(bestBayesNet, bayesNet);
168:
169: // go do the search
170: for (int iRun = 0; iRun < m_nRuns; iRun++) {
171: Operation oOperation = getOptimalOperation(bayesNet,
172: instances);
173: performOperation(bayesNet, instances, oOperation);
174: // sanity check
175: if (oOperation == null) {
176: throw new Exception(
177: "Panic: could not find any step to make. Tabu list too long?");
178: }
179: // update tabu list
180: m_oTabuList[iCurrentTabuList] = oOperation;
181: iCurrentTabuList = (iCurrentTabuList + 1) % m_nTabuList;
182:
183: fCurrentScore += oOperation.m_fDeltaScore;
184: // keep track of best network seen so far
185: if (fCurrentScore > fBestScore) {
186: fBestScore = fCurrentScore;
187: copyParentSets(bestBayesNet, bayesNet);
188: }
189:
190: if (bayesNet.getDebug()) {
191: printTabuList();
192: }
193: }
194:
195: // restore current network to best network
196: copyParentSets(bayesNet, bestBayesNet);
197:
198: // free up memory
199: bestBayesNet = null;
200: m_Cache = null;
201: } // search
202:
203: /**
204: * copyParentSets copies parent sets of source to dest BayesNet
205: *
206: * @param dest destination network
207: * @param source source network
208: */
209: void copyParentSets(BayesNet dest, BayesNet source) {
210: int nNodes = source.getNrOfNodes();
211: // clear parent set first
212: for (int iNode = 0; iNode < nNodes; iNode++) {
213: dest.getParentSet(iNode).copy(source.getParentSet(iNode));
214: }
215: } // CopyParentSets
216:
217: /**
218: * check whether the operation is not in the tabu list
219: *
220: * @param oOperation operation to be checked
221: * @return true if operation is not in the tabu list
222: */
223: boolean isNotTabu(Operation oOperation) {
224: for (int iTabu = 0; iTabu < m_nTabuList; iTabu++) {
225: if (oOperation.equals(m_oTabuList[iTabu])) {
226: return false;
227: }
228: }
229: return true;
230: } // isNotTabu
231:
232: /** print tabu list for debugging purposes.
233: */
234: void printTabuList() {
235: for (int i = 0; i < m_nTabuList; i++) {
236: Operation o = m_oTabuList[i];
237: if (o != null) {
238: if (o.m_nOperation == 0) {
239: System.out.print(" +(");
240: } else {
241: System.out.print(" -(");
242: }
243: System.out.print(o.m_nTail + "->" + o.m_nHead + ")");
244: }
245: }
246: System.out.println();
247: } // printTabuList
248:
249: /**
250: * @return number of runs
251: */
252: public int getRuns() {
253: return m_nRuns;
254: } // getRuns
255:
256: /**
257: * Sets the number of runs
258: * @param nRuns The number of runs to set
259: */
260: public void setRuns(int nRuns) {
261: m_nRuns = nRuns;
262: } // setRuns
263:
264: /**
265: * @return the Tabu List length
266: */
267: public int getTabuList() {
268: return m_nTabuList;
269: } // getTabuList
270:
271: /**
272: * Sets the Tabu List length.
273: * @param nTabuList The nTabuList to set
274: */
275: public void setTabuList(int nTabuList) {
276: m_nTabuList = nTabuList;
277: } // setTabuList
278:
279: /**
280: * Returns an enumeration describing the available options.
281: *
282: * @return an enumeration of all the available options.
283: */
284: public Enumeration listOptions() {
285: Vector newVector = new Vector(4);
286:
287: newVector.addElement(new Option("\tTabu list length", "L", 1,
288: "-L <integer>"));
289: newVector.addElement(new Option("\tNumber of runs", "U", 1,
290: "-U <integer>"));
291: newVector.addElement(new Option("\tMaximum number of parents",
292: "P", 1, "-P <nr of parents>"));
293: newVector.addElement(new Option(
294: "\tUse arc reversal operation.\n\t(default false)",
295: "R", 0, "-R"));
296:
297: Enumeration enu = super .listOptions();
298: while (enu.hasMoreElements()) {
299: newVector.addElement(enu.nextElement());
300: }
301: return newVector.elements();
302: } // listOptions
303:
304: /**
305: * Parses a given list of options. <p/>
306: *
307: <!-- options-start -->
308: * Valid options are: <p/>
309: *
310: * <pre> -L <integer>
311: * Tabu list length</pre>
312: *
313: * <pre> -U <integer>
314: * Number of runs</pre>
315: *
316: * <pre> -P <nr of parents>
317: * Maximum number of parents</pre>
318: *
319: * <pre> -R
320: * Use arc reversal operation.
321: * (default false)</pre>
322: *
323: * <pre> -P <nr of parents>
324: * Maximum number of parents</pre>
325: *
326: * <pre> -R
327: * Use arc reversal operation.
328: * (default false)</pre>
329: *
330: * <pre> -N
331: * Initial structure is empty (instead of Naive Bayes)</pre>
332: *
333: * <pre> -mbc
334: * Applies a Markov Blanket correction to the network structure,
335: * after a network structure is learned. This ensures that all
336: * nodes in the network are part of the Markov blanket of the
337: * classifier node.</pre>
338: *
339: * <pre> -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
340: * Score type (BAYES, BDeu, MDL, ENTROPY and AIC)</pre>
341: *
342: <!-- options-end -->
343: *
344: * @param options the list of options as an array of strings
345: * @throws Exception if an option is not supported
346: */
347: public void setOptions(String[] options) throws Exception {
348: String sTabuList = Utils.getOption('L', options);
349: if (sTabuList.length() != 0) {
350: setTabuList(Integer.parseInt(sTabuList));
351: }
352: String sRuns = Utils.getOption('U', options);
353: if (sRuns.length() != 0) {
354: setRuns(Integer.parseInt(sRuns));
355: }
356:
357: super .setOptions(options);
358: } // setOptions
359:
360: /**
361: * Gets the current settings of the search algorithm.
362: *
363: * @return an array of strings suitable for passing to setOptions
364: */
365: public String[] getOptions() {
366: String[] super Options = super .getOptions();
367: String[] options = new String[7 + super Options.length];
368: int current = 0;
369:
370: options[current++] = "-L";
371: options[current++] = "" + getTabuList();
372:
373: options[current++] = "-U";
374: options[current++] = "" + getRuns();
375:
376: // insert options from parent class
377: for (int iOption = 0; iOption < super Options.length; iOption++) {
378: options[current++] = super Options[iOption];
379: }
380:
381: // Fill up rest with empty strings, not nulls!
382: while (current < options.length) {
383: options[current++] = "";
384: }
385: return options;
386: } // getOptions
387:
388: /**
389: * This will return a string describing the classifier.
390: * @return The string.
391: */
392: public String globalInfo() {
393: return "This Bayes Network learning algorithm uses tabu search for finding a well scoring "
394: + "Bayes network structure. Tabu search is hill climbing till an optimum is reached. The "
395: + "following step is the least worst possible step. The last X steps are kept in a list and "
396: + "none of the steps in this so called tabu list is considered in taking the next step. "
397: + "The best network found in this traversal is returned.\n\n"
398: + "For more information see:\n\n"
399: + getTechnicalInformation().toString();
400: } // globalInfo
401:
402: /**
403: * @return a string to describe the Runs option.
404: */
405: public String runsTipText() {
406: return "Sets the number of steps to be performed.";
407: } // runsTipText
408:
409: /**
410: * @return a string to describe the TabuList option.
411: */
412: public String tabuListTipText() {
413: return "Sets the length of the tabu list.";
414: } // tabuListTipText
415:
416: } // TabuSearch
|