001: // yacyDHTAction.java
002: // -------------------------------------
003: // (C) by Michael Peter Christen; mc@anomic.de
004: // first published on http://www.anomic.de
005: // Frankfurt, Germany, 2005
006: //
007: // $LastChangedDate: 2008-01-25 11:44:27 +0000 (Fr, 25 Jan 2008) $
008: // $LastChangedRevision: 4399 $
009: // $LastChangedBy: orbiter $
010: //
011: // This program is free software; you can redistribute it and/or modify
012: // it under the terms of the GNU General Public License as published by
013: // the Free Software Foundation; either version 2 of the License, or
014: // (at your option) any later version.
015: //
016: // This program is distributed in the hope that it will be useful,
017: // but WITHOUT ANY WARRANTY; without even the implied warranty of
018: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: // GNU General Public License for more details.
020: //
021: // You should have received a copy of the GNU General Public License
022: // along with this program; if not, write to the Free Software
023: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: //
025: // Using this software in any meaning (reading, learning, copying, compiling,
026: // running) means that you agree that the Author(s) is (are) not responsible
027: // for cost, loss of data or any harm that may be caused directly or indirectly
028: // by usage of this softare or this documentation. The usage of this software
029: // is on your own risk. The installation and usage (starting/running) of this
030: // software may allow other people or application to access your computer and
031: // any attached devices and is highly dependent on the configuration of the
032: // software which must be done by the user of the software; the author(s) is
033: // (are) also not responsible for proper configuration and usage of the
034: // software, even if provoked by documentation provided together with
035: // the software.
036: //
037: // Any changes to this file according to the GPL as documented in the file
038: // gpl.txt aside this file in the shipment you received can be done to the
039: // lines that follows this copyright notice here, but changes must not be
040: // done inside the copyright notive above. A re-distribution must contain
041: // the intact and unchanged copyright notice.
042: // Contributions and changes to the program code must be marked as such.
043:
044: package de.anomic.yacy;
045:
046: import java.util.ArrayList;
047: import java.util.Iterator;
048: import java.util.TreeMap;
049: import java.util.TreeSet;
050:
051: import de.anomic.kelondro.kelondroBase64Order;
052: import de.anomic.kelondro.kelondroCloneableIterator;
053: import de.anomic.kelondro.kelondroCloneableMapIterator;
054: import de.anomic.kelondro.kelondroException;
055: import de.anomic.kelondro.kelondroMScoreCluster;
056: import de.anomic.kelondro.kelondroRotateIterator;
057: import de.anomic.server.logging.serverLog;
058:
059: public class yacyDHTAction implements yacyPeerAction {
060:
061: protected yacySeedDB seedDB;
062: protected kelondroMScoreCluster<String> seedCrawlReady;
063:
064: public yacyDHTAction(yacySeedDB seedDB) {
065: this .seedDB = seedDB;
066: this .seedCrawlReady = new kelondroMScoreCluster<String>();
067: // init crawl-ready table
068: try {
069: Iterator<yacySeed> en = seedDB.seedsConnected(true, false,
070: null, (float) 0.0);
071: yacySeed ys;
072: while (en.hasNext()) {
073: ys = (yacySeed) en.next();
074: if ((ys != null) && (ys.getVersion() >= ((float) 0.3)))
075: seedCrawlReady.setScore(ys.hash, yacyCore
076: .yacyTime());
077: }
078: } catch (IllegalArgumentException e) {
079: }
080: }
081:
082: public Iterator<yacySeed> getDHTSeeds(boolean up, String firstHash,
083: float minVersion) {
084: // enumerates seed-type objects: all seeds with starting point in the middle, rotating at the end/beginning
085: return new seedDHTEnum(up, firstHash, minVersion);
086: }
087:
088: class seedDHTEnum implements Iterator<yacySeed> {
089:
090: Iterator<yacySeed> e1, e2;
091: boolean up;
092: int steps;
093: float minVersion;
094:
095: public seedDHTEnum(boolean up, String firstHash,
096: float minVersion) {
097: this .steps = seedDB.sizeConnected();
098: this .up = up;
099: this .minVersion = minVersion;
100: this .e1 = seedDB.seedsConnected(up, false, firstHash,
101: minVersion);
102: this .e2 = null;
103: }
104:
105: public boolean hasNext() {
106: return (steps > 0) && ((e2 == null) || (e2.hasNext()));
107: }
108:
109: public yacySeed next() {
110: if (steps == 0)
111: return null;
112: steps--;
113: if ((e1 != null) && (e1.hasNext())) {
114: yacySeed n = e1.next();
115: if (!(e1.hasNext())) {
116: e1 = null;
117: e2 = seedDB.seedsConnected(up, false, null,
118: minVersion);
119: }
120: return n;
121: } else {
122: if (e2 == null) {
123: e1 = null;
124: e2 = seedDB.seedsConnected(up, false, null,
125: minVersion);
126: }
127: return e2.next();
128: }
129: }
130:
131: public void remove() {
132: throw new UnsupportedOperationException();
133: }
134: }
135:
136: public Iterator<yacySeed> getProvidesRemoteCrawlURLs() {
137: return new providesRemoteCrawlURLsEnum();
138: }
139:
140: class providesRemoteCrawlURLsEnum implements Iterator<yacySeed> {
141:
142: Iterator<yacySeed> se;
143: yacySeed nextSeed;
144:
145: public providesRemoteCrawlURLsEnum() {
146: se = getDHTSeeds(true, null,
147: yacyVersion.YACY_POVIDES_REMOTECRAWL_LISTS);
148: nextSeed = nextInternal();
149: }
150:
151: public boolean hasNext() {
152: return nextSeed != null;
153: }
154:
155: private yacySeed nextInternal() {
156: yacySeed s;
157: try {
158: while (se.hasNext()) {
159: s = (yacySeed) se.next();
160: if (s == null)
161: return null;
162: if (s.getLong(yacySeed.RCOUNT, 0) > 0)
163: return s;
164: }
165: } catch (kelondroException e) {
166: System.out.println("DEBUG providesRemoteCrawlURLsEnum:"
167: + e.getMessage());
168: yacyCore.log.logSevere("database inconsistency ("
169: + e.getMessage() + "), re-set of db.");
170: seedDB.resetActiveTable();
171: return null;
172: }
173: return null;
174: }
175:
176: public yacySeed next() {
177: yacySeed next = nextSeed;
178: nextSeed = nextInternal();
179: return next;
180: }
181:
182: public void remove() {
183: throw new UnsupportedOperationException();
184: }
185:
186: }
187:
188: public Iterator<yacySeed> getAcceptRemoteIndexSeeds(String starthash) {
189: // returns an enumeration of yacySeed-Objects
190: // that have the AcceptRemoteIndex-Flag set
191: // the seeds are enumerated in the right order according DHT
192: return new acceptRemoteIndexSeedEnum(starthash);
193: }
194:
195: class acceptRemoteIndexSeedEnum implements Iterator<yacySeed> {
196:
197: Iterator<yacySeed> se;
198: yacySeed nextSeed;
199:
200: public acceptRemoteIndexSeedEnum(String starthash) {
201: se = getDHTSeeds(true, starthash,
202: yacyVersion.YACY_HANDLES_COLLECTION_INDEX);
203: nextSeed = nextInternal();
204: }
205:
206: public boolean hasNext() {
207: return nextSeed != null;
208: }
209:
210: private yacySeed nextInternal() {
211: yacySeed s;
212: try {
213: while (se.hasNext()) {
214: s = (yacySeed) se.next();
215: if (s == null)
216: return null;
217: if (s.getFlagAcceptRemoteIndex())
218: return s;
219: }
220: } catch (kelondroException e) {
221: System.out.println("DEBUG acceptRemoteIndexSeedEnum:"
222: + e.getMessage());
223: yacyCore.log.logSevere("database inconsistency ("
224: + e.getMessage() + "), re-set of db.");
225: seedDB.resetActiveTable();
226: return null;
227: }
228: return null;
229: }
230:
231: public yacySeed next() {
232: yacySeed next = nextSeed;
233: nextSeed = nextInternal();
234: return next;
235: }
236:
237: public void remove() {
238: throw new UnsupportedOperationException();
239: }
240:
241: }
242:
243: public Iterator<yacySeed> getAcceptRemoteCrawlSeeds(
244: String starthash, boolean available) {
245: return new acceptRemoteCrawlSeedEnum(starthash, available);
246: }
247:
248: class acceptRemoteCrawlSeedEnum implements Iterator<yacySeed> {
249:
250: Iterator<yacySeed> se;
251: yacySeed nextSeed;
252: boolean available;
253:
254: public acceptRemoteCrawlSeedEnum(String starthash,
255: boolean available) {
256: this .se = getDHTSeeds(true, starthash, (float) 0.0);
257: this .available = available;
258: nextSeed = nextInternal();
259: }
260:
261: public boolean hasNext() {
262: return nextSeed != null;
263: }
264:
265: private yacySeed nextInternal() {
266: yacySeed s;
267: while (se.hasNext()) {
268: s = (yacySeed) se.next();
269: if (s == null)
270: return null;
271: s.available = seedCrawlReady.getScore(s.hash);
272: if (available) {
273: if (seedCrawlReady.getScore(s.hash) < yacyCore
274: .yacyTime())
275: return s;
276: } else {
277: if (seedCrawlReady.getScore(s.hash) > yacyCore
278: .yacyTime())
279: return s;
280: }
281: }
282: return null;
283: }
284:
285: public yacySeed next() {
286: yacySeed next = nextSeed;
287: nextSeed = nextInternal();
288: return next;
289: }
290:
291: public void remove() {
292: throw new UnsupportedOperationException();
293: }
294:
295: }
296:
297: public synchronized yacySeed getGlobalCrawlSeed(String urlHash) {
298: Iterator<yacySeed> e = getAcceptRemoteCrawlSeeds(urlHash, true);
299: yacySeed seed;
300: if (e.hasNext())
301: seed = e.next();
302: else
303: seed = null;
304: e = null;
305: return seed;
306: }
307:
308: public synchronized yacySeed getPublicClusterCrawlSeed(
309: String urlHash, TreeMap<String, String> clusterhashes) {
310: // clusterhashes is a String(hash)/String(IP) - mapping
311: kelondroCloneableIterator<String> i = new kelondroRotateIterator<String>(
312: new kelondroCloneableMapIterator<String>(clusterhashes,
313: urlHash), null, clusterhashes.size());
314: String hash;
315: int count = clusterhashes.size(); // counter to ensure termination
316: while ((i.hasNext()) && (count-- > 0)) {
317: hash = i.next();
318: yacySeed seed = seedDB.getConnected(hash);
319: if (seed == null)
320: continue;
321: seed
322: .setAlternativeAddress((String) clusterhashes
323: .get(hash));
324: return seed;
325: }
326: return null;
327: }
328:
329: public void setCrawlTime(String seedHash, int newYacyTime) {
330: if (newYacyTime < yacyCore.yacyTime())
331: newYacyTime = yacyCore.yacyTime();
332: seedCrawlReady.setScore(seedHash, newYacyTime);
333: }
334:
335: public void setCrawlDelay(String seedHash, int newDelay) {
336: seedCrawlReady.setScore(seedHash, yacyCore.yacyTime()
337: + newDelay);
338: }
339:
340: public void processPeerArrival(yacySeed peer, boolean direct) {
341: if (peer.getVersion() >= ((float) 0.3)) {
342: if (!(seedCrawlReady.existsScore(peer.hash)))
343: seedCrawlReady.setScore(peer.hash, yacyCore.yacyTime());
344: } else {
345: seedCrawlReady.deleteScore(peer.hash);
346: }
347: }
348:
349: public void processPeerDeparture(yacySeed peer) {
350: seedCrawlReady.deleteScore(peer.hash);
351: }
352:
353: public void processPeerPing(yacySeed peer) {
354: }
355:
356: public static boolean shallBeOwnWord(String wordhash) {
357: if (yacyCore.seedDB == null)
358: return false;
359: if (yacyCore.seedDB.mySeed().isPotential())
360: return false;
361: final double distance = dhtDistance(
362: yacyCore.seedDB.mySeed().hash, wordhash);
363: final double max = 1.2 / yacyCore.seedDB.sizeConnected();
364: //System.out.println("Distance for " + wordhash + ": " + distance + "; max is " + max);
365: return (distance > 0) && (distance <= max);
366: }
367:
368: public static double dhtDistance(String peer, String word) {
369: // the dht distance is a positive value between 0 and 1
370: // if the distance is small, the word more probably belongs to the peer
371: double d = hashDistance(peer, word);
372: if (d > 0) {
373: return d; // case where the word is 'before' the peer
374: } else {
375: return ((double) 1) + d; // wrap-around case
376: }
377: }
378:
379: private static double hashDistance(String from, String to) {
380: // computes the distance between two hashes.
381: // the maximum distance between two hashes is 1, the minimum -1
382: // this can be used like "from - to"
383: // the result is positive if from > to
384: assert (from != null);
385: assert (to != null);
386: assert (from.length() == 12) : "from.length = " + from.length()
387: + ", from = " + from;
388: assert (to.length() == 12) : "to.length = " + to.length()
389: + ", to = " + to;
390: return ((double) (kelondroBase64Order.enhancedCoder
391: .cardinal(from.getBytes()) - kelondroBase64Order.enhancedCoder
392: .cardinal(to.getBytes())))
393: / ((double) Long.MAX_VALUE);
394: }
395:
396: public synchronized ArrayList<yacySeed> getDHTTargets(
397: serverLog log, int primaryPeerCount, int reservePeerCount,
398: String firstKey, String lastKey, double maxDist) {
399: // find a list of DHT-peers
400: assert firstKey != null;
401: assert lastKey != null;
402: assert yacyCore.seedDB != null;
403: assert yacyCore.seedDB.mySeed() != null;
404: assert yacyCore.seedDB.mySeed().hash != null;
405: /*
406: assert
407: !(kelondroBase64Order.enhancedCoder.cardinal(firstKey.getBytes()) < kelondroBase64Order.enhancedCoder.cardinal(yacyCore.seedDB.mySeed.hash.getBytes()) &&
408: kelondroBase64Order.enhancedCoder.cardinal(lastKey.getBytes()) > kelondroBase64Order.enhancedCoder.cardinal(yacyCore.seedDB.mySeed.hash.getBytes()));
409: */
410: ArrayList<yacySeed> seeds = new ArrayList<yacySeed>();
411: yacySeed seed;
412: //double ownDistance = Math.min(yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, firstKey), yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, lastKey));
413: //double maxDistance = Math.min(ownDistance, maxDist);
414:
415: double firstdist, lastdist;
416: Iterator<yacySeed> e = this .getAcceptRemoteIndexSeeds(lastKey);
417: TreeSet<String> doublecheck = new TreeSet<String>(
418: kelondroBase64Order.enhancedComparator);
419: int maxloop = Math.min(100, yacyCore.seedDB.sizeConnected()); // to ensure termination
420: if (log != null)
421: log.logInfo("Collecting DHT target peers for first_hash = "
422: + firstKey + ", last_hash = " + lastKey);
423: while ((e.hasNext())
424: && (seeds.size() < (primaryPeerCount + reservePeerCount))
425: && (maxloop-- > 0)) {
426: seed = (yacySeed) e.next();
427: if (seeds != null) {
428: firstdist = yacyDHTAction.dhtDistance(seed.hash,
429: firstKey);
430: lastdist = yacyDHTAction
431: .dhtDistance(seed.hash, lastKey);
432: if (lastdist > maxDist) {
433: if (log != null)
434: log
435: .logFine("Discarded too distant DHT target peer "
436: + seed.getName()
437: + ":"
438: + seed.hash
439: + ", distance2first = "
440: + firstdist
441: + ", distance2last = "
442: + lastdist);
443: } else if (doublecheck.contains(seed.hash)) {
444: if (log != null)
445: log.logFine("Discarded double DHT target peer "
446: + seed.getName() + ":" + seed.hash
447: + ", distance2first = " + firstdist
448: + ", distance2last = " + lastdist);
449: } else {
450: if (log != null)
451: log
452: .logInfo("Selected "
453: + ((seeds.size() < primaryPeerCount) ? "primary"
454: : "reserve")
455: + " DHT target peer "
456: + seed.getName() + ":"
457: + seed.hash
458: + ", distance2first = "
459: + firstdist
460: + ", distance2last = "
461: + lastdist);
462: seeds.add(seed);
463: doublecheck.add(seed.hash);
464: }
465: }
466: }
467: e = null; // finish enumeration
468:
469: return seeds;
470: }
471: }
|