001: // yacyPeerActions.java
002: // -------------------------------------
003: // (C) by Michael Peter Christen; mc@anomic.de
004: // first published on http://www.anomic.de
005: // Frankfurt, Germany, 2005
006: //
007: // $LastChangedDate: 2008-01-22 19:10:03 +0000 (Di, 22 Jan 2008) $
008: // $LastChangedRevision: 4358 $
009: // $LastChangedBy: orbiter $
010: //
011: // This program is free software; you can redistribute it and/or modify
012: // it under the terms of the GNU General Public License as published by
013: // the Free Software Foundation; either version 2 of the License, or
014: // (at your option) any later version.
015: //
016: // This program is distributed in the hope that it will be useful,
017: // but WITHOUT ANY WARRANTY; without even the implied warranty of
018: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: // GNU General Public License for more details.
020: //
021: // You should have received a copy of the GNU General Public License
022: // along with this program; if not, write to the Free Software
023: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: //
025: // Using this software in any meaning (reading, learning, copying, compiling,
026: // running) means that you agree that the Author(s) is (are) not responsible
027: // for cost, loss of data or any harm that may be caused directly or indirectly
028: // by usage of this softare or this documentation. The usage of this software
029: // is on your own risk. The installation and usage (starting/running) of this
030: // software may allow other people or application to access your computer and
031: // any attached devices and is highly dependent on the configuration of the
032: // software which must be done by the user of the software; the author(s) is
033: // (are) also not responsible for proper configuration and usage of the
034: // software, even if provoked by documentation provided together with
035: // the software.
036: //
037: // Any changes to this file according to the GPL as documented in the file
038: // gpl.txt aside this file in the shipment you received can be done to the
039: // lines that follows this copyright notice here, but changes must not be
040: // done inside the copyright notive above. A re-distribution must contain
041: // the intact and unchanged copyright notice.
042: // Contributions and changes to the program code must be marked as such.
043:
044: package de.anomic.yacy;
045:
046: import java.io.IOException;
047: import java.util.ArrayList;
048: import java.util.HashMap;
049: import java.util.HashSet;
050: import java.util.Iterator;
051:
052: import de.anomic.http.httpHeader;
053: import de.anomic.http.httpc;
054: import de.anomic.plasma.plasmaCrawlNURL;
055: import de.anomic.plasma.plasmaSwitchboard;
056: import de.anomic.server.serverCore;
057: import de.anomic.server.serverDate;
058: import de.anomic.tools.nxTools;
059:
060: public class yacyPeerActions {
061:
062: private yacySeedDB seedDB;
063: private plasmaSwitchboard sb;
064: private HashSet<yacyPeerAction> actions;
065: private HashMap<String, String> userAgents;
066: public long juniorConnects;
067: public long seniorConnects;
068: public long principalConnects;
069: public long disconnects;
070: private int bootstrapLoadTimeout;
071:
072: public yacyPeerActions(yacySeedDB seedDB,
073: plasmaSwitchboard switchboard) {
074: this .seedDB = seedDB;
075: this .sb = switchboard;
076: this .actions = new HashSet<yacyPeerAction>();
077: this .userAgents = new HashMap<String, String>();
078: this .juniorConnects = 0;
079: this .seniorConnects = 0;
080: this .principalConnects = 0;
081: this .disconnects = 0;
082: this .bootstrapLoadTimeout = (int) switchboard.getConfigLong(
083: "bootstrapLoadTimeout", 6000);
084: }
085:
086: public void deploy(yacyPeerAction action) {
087: actions.add(action);
088: }
089:
090: public void updateMySeed() {
091: if (sb.getConfig("peerName", "anomic").equals("anomic")) {
092: // generate new peer name
093: sb.setConfig("peerName", yacySeed.makeDefaultPeerName());
094: }
095: seedDB.mySeed().put(yacySeed.NAME,
096: sb.getConfig("peerName", "nameless"));
097: if ((serverCore.portForwardingEnabled)
098: && (serverCore.portForwarding != null)) {
099: seedDB.mySeed().put(
100: yacySeed.PORT,
101: Integer.toString(serverCore.portForwarding
102: .getPort()));
103: } else {
104: seedDB.mySeed().put(
105: yacySeed.PORT,
106: Integer.toString(serverCore.getPortNr(sb.getConfig(
107: "port", "8080"))));
108: }
109:
110: long uptime = (System.currentTimeMillis() - serverCore.startupTime) / 1000;
111: long uptimediff = uptime - sb.lastseedcheckuptime;
112: long indexedcdiff = sb.indexedPages - sb.lastindexedPages;
113: //double requestcdiff = sb.requestedQueries - sb.lastrequestedQueries;
114: if (uptimediff > 300 || uptimediff <= 0
115: || sb.lastseedcheckuptime == -1) {
116: sb.lastseedcheckuptime = uptime;
117: sb.lastindexedPages = sb.indexedPages;
118: sb.lastrequestedQueries = sb.requestedQueries;
119: }
120:
121: //the speed of indexing (pages/minute) of the peer
122: sb.totalPPM = (int) (sb.indexedPages * 60 / Math.max(uptime, 1));
123: seedDB.mySeed().put(
124: yacySeed.ISPEED,
125: Long.toString(Math.round(Math.max((float) indexedcdiff,
126: 0f)
127: * 60f / Math.max((float) uptimediff, 1f))));
128: sb.totalQPM = sb.requestedQueries * 60d
129: / Math.max((double) uptime, 1d);
130: seedDB
131: .mySeed()
132: .put(
133: yacySeed.RSPEED,
134: Double
135: .toString(sb.totalQPM /*Math.max((float) requestcdiff, 0f) * 60f / Math.max((float) uptimediff, 1f)*/));
136:
137: seedDB.mySeed()
138: .put(yacySeed.UPTIME, Long.toString(uptime / 60)); // the number of minutes that the peer is up in minutes/day (moving average MA30)
139: seedDB.mySeed().put(yacySeed.LCOUNT,
140: Integer.toString(sb.wordIndex.loadedURL.size())); // the number of links that the peer has stored (LURL's)
141: seedDB.mySeed().put(yacySeed.NCOUNT,
142: Integer.toString(sb.crawlQueues.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's)
143: seedDB.mySeed().put(
144: yacySeed.RCOUNT,
145: Integer.toString(sb.crawlQueues.noticeURL
146: .stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT))); // the number of links that the peer provides for remote crawling (ZURL's)
147: seedDB.mySeed().put(yacySeed.ICOUNT,
148: Integer.toString(sb.wordIndex.size())); // the minimum number of words that the peer has indexed (as it says)
149: seedDB.mySeed().put(yacySeed.SCOUNT,
150: Integer.toString(seedDB.sizeConnected())); // the number of seeds that the peer has stored
151: seedDB
152: .mySeed()
153: .put(
154: yacySeed.CCOUNT,
155: Double
156: .toString(((int) ((seedDB
157: .sizeConnected()
158: + seedDB.sizeDisconnected() + seedDB
159: .sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour)
160: seedDB.mySeed().put(yacySeed.VERSION,
161: sb.getConfig("version", ""));
162: if (seedDB.mySeed().get(yacySeed.PEERTYPE, "").equals(
163: yacySeed.PEERTYPE_PRINCIPAL)) {
164: // attach information about seed location
165: seedDB.mySeed().put("seedURL", sb.getConfig("seedURL", ""));
166: }
167: seedDB.mySeed().setFlagDirectConnect(true);
168: seedDB.mySeed().setLastSeenUTC();
169: seedDB.mySeed().put(yacySeed.UTC, serverDate.UTCDiffString());
170: seedDB.mySeed().setFlagAcceptRemoteCrawl(
171: sb.getConfig("crawlResponse", "").equals("true"));
172: seedDB.mySeed().setFlagAcceptRemoteIndex(
173: sb.getConfig("allowReceiveIndex", "").equals("true"));
174: //mySeed.setFlagAcceptRemoteIndex(true);
175: }
176:
177: public void saveMySeed() {
178: try {
179: seedDB.mySeed().save(sb.getOwnSeedFile());
180: } catch (IOException e) {
181: }
182: }
183:
184: public void loadSeedLists() {
185: // uses the superseed to initialize the database with known seeds
186:
187: yacySeed ys;
188: String seedListFileURL;
189: yacyURL url;
190: ArrayList<String> seedList;
191: Iterator<String> enu;
192: int lc;
193: int sc = seedDB.sizeConnected();
194: httpHeader header;
195:
196: yacyCore.log.logInfo("BOOTSTRAP: " + sc
197: + " seeds known from previous run");
198:
199: // - use the superseed to further fill up the seedDB
200: int ssc = 0, c = 0;
201: while (true) {
202: if (Thread.currentThread().isInterrupted())
203: break;
204: seedListFileURL = sb.getConfig(
205: "network.unit.bootstrap.seedlist" + c, "");
206: if (seedListFileURL.length() == 0)
207: break;
208: c++;
209: if (seedListFileURL.startsWith("http://")
210: || seedListFileURL.startsWith("https://")) {
211: // load the seed list
212: try {
213: httpHeader reqHeader = new httpHeader();
214: reqHeader.put(httpHeader.PRAGMA, "no-cache");
215: reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache");
216:
217: url = new yacyURL(seedListFileURL, null);
218: long start = System.currentTimeMillis();
219: header = httpc.whead(url, url.getHost(),
220: this .bootstrapLoadTimeout, null, null,
221: this .sb.remoteProxyConfig, reqHeader);
222: long loadtime = System.currentTimeMillis() - start;
223: if (header == null) {
224: if (loadtime > this .bootstrapLoadTimeout) {
225: yacyCore.log
226: .logWarning("BOOTSTRAP: seed-list URL "
227: + seedListFileURL
228: + " not available, time-out after "
229: + loadtime
230: + " milliseconds");
231: } else {
232: yacyCore.log
233: .logWarning("BOOTSTRAP: seed-list URL "
234: + seedListFileURL
235: + " not available, no content");
236: }
237: } else if (header.lastModified() == null) {
238: yacyCore.log
239: .logWarning("BOOTSTRAP: seed-list URL "
240: + seedListFileURL
241: + " not usable, last-modified is missing");
242: } else if ((header.age() > 86400000) && (ssc > 0)) {
243: yacyCore.log
244: .logInfo("BOOTSTRAP: seed-list URL "
245: + seedListFileURL
246: + " too old ("
247: + (header.age() / 86400000)
248: + " days)");
249: } else {
250: ssc++;
251: seedList = nxTools.strings(httpc.wget(url, url
252: .getHost(), this .bootstrapLoadTimeout,
253: null, null, this .sb.remoteProxyConfig,
254: reqHeader, null), "UTF-8");
255: enu = seedList.iterator();
256: lc = 0;
257: while (enu.hasNext()) {
258: ys = yacySeed.genRemoteSeed((String) enu
259: .next(), null, true);
260: if ((ys != null)
261: && (ys.isProper() == null)
262: && ((!seedDB.mySeedIsDefined()) || (seedDB
263: .mySeed().hash != ys.hash))) {
264: if (connectPeer(ys, false))
265: lc++;
266: //seedDB.writeMap(ys.hash, ys.getMap(), "init");
267: //System.out.println("BOOTSTRAP: received peer " + ys.get(yacySeed.NAME, "anonymous") + "/" + ys.getAddress());
268: //lc++;
269: }
270: }
271: yacyCore.log.logInfo("BOOTSTRAP: " + lc
272: + " seeds from seed-list URL "
273: + seedListFileURL + ", AGE="
274: + (header.age() / 3600000) + "h");
275: }
276:
277: } catch (IOException e) {
278: // this is when wget fails, commonly because of timeout
279: yacyCore.log.logWarning(
280: "BOOTSTRAP: failed (1) to load seeds from seed-list URL "
281: + seedListFileURL + ": "
282: + e.getMessage(), e);
283: } catch (Exception e) {
284: // this is when wget fails; may be because of missing internet connection
285: yacyCore.log.logSevere(
286: "BOOTSTRAP: failed (2) to load seeds from seed-list URL "
287: + seedListFileURL + ": "
288: + e.getMessage(), e);
289: }
290: }
291: }
292: yacyCore.log.logInfo("BOOTSTRAP: "
293: + (seedDB.sizeConnected() - sc)
294: + " new seeds while bootstraping.");
295: }
296:
297: private synchronized boolean connectPeer(yacySeed seed,
298: boolean direct) {
299: // store a remote peer's seed
300: // returns true if the peer is new and previously unknown
301: if (seed == null) {
302: yacyCore.log.logSevere("connect: WRONG seed (NULL)");
303: return false;
304: }
305: final String error = seed.isProper();
306: if (error != null) {
307: yacyCore.log.logSevere("connect: WRONG seed ("
308: + seed.getName() + "/" + seed.hash + "): " + error);
309: return false;
310: }
311: if ((this .seedDB.mySeedIsDefined())
312: && (seed.hash.equals(this .seedDB.mySeed().hash))) {
313: yacyCore.log.logInfo("connect: SELF reference "
314: + seed.getPublicAddress());
315: return false;
316: }
317: final String peerType = seed.get(yacySeed.PEERTYPE,
318: yacySeed.PEERTYPE_VIRGIN);
319:
320: if ((peerType.equals(yacySeed.PEERTYPE_VIRGIN))
321: || (peerType.equals(yacySeed.PEERTYPE_JUNIOR))) {
322: // reject unqualified seeds
323: yacyCore.log.logFine("connect: rejecting NOT QUALIFIED "
324: + peerType + " seed " + seed.getName());
325: return false;
326: }
327:
328: final yacySeed doubleSeed = this .seedDB.lookupByIP(seed
329: .getInetAddress(), true, false, false);
330: if ((doubleSeed != null)
331: && (doubleSeed.getPort() == seed.getPort())
332: && (!(doubleSeed.hash.equals(seed.hash)))) {
333: // a user frauds with his peer different peer hashes
334: yacyCore.log
335: .logFine("connect: rejecting FRAUD (double hashes "
336: + doubleSeed.hash + "/" + seed.hash
337: + " on same port " + seed.getPort()
338: + ") peer " + seed.getName());
339: return false;
340: }
341:
342: if (seed.get(yacySeed.LASTSEEN, "").length() != 14) {
343: // hack for peers that do not have a LastSeen date
344: seed.setLastSeenUTC();
345: yacyCore.log.logFine("connect: reset wrong date ("
346: + seed.getName() + "/" + seed.hash + ")");
347: }
348:
349: // connection time
350: final long nowUTC0Time = System.currentTimeMillis(); // is better to have this value in a variable for debugging
351: long ctimeUTC0 = seed.getLastSeenUTC();
352:
353: if (ctimeUTC0 > nowUTC0Time) {
354: // the peer is future-dated, correct it
355: seed.setLastSeenUTC();
356: ctimeUTC0 = nowUTC0Time;
357: assert (seed.getLastSeenUTC() - ctimeUTC0 < 100);
358: }
359: if (Math.abs(nowUTC0Time - ctimeUTC0) > 60 * 60 * 24 * 1000) {
360: // the new connection is out-of-age, we reject the connection
361: yacyCore.log.logFine("connect: rejecting out-dated peer '"
362: + seed.getName()
363: + "' from "
364: + seed.getPublicAddress()
365: + "; nowUTC0="
366: + nowUTC0Time
367: + ", seedUTC0="
368: + ctimeUTC0
369: + ", TimeDiff="
370: + serverDate.formatInterval(Math.abs(nowUTC0Time
371: - ctimeUTC0)));
372: return false;
373: }
374:
375: // disconnection time
376: long dtimeUTC0;
377: final yacySeed disconnectedSeed = seedDB
378: .getDisconnected(seed.hash);
379: if (disconnectedSeed == null) {
380: dtimeUTC0 = 0; // never disconnected: virtually disconnected maximum time ago
381: } else {
382: dtimeUTC0 = disconnectedSeed.getLong("dct", 0);
383: }
384:
385: if (direct) {
386: // remember the moment
387: // Date applies the local UTC offset, which is wrong
388: // we correct that by subtracting the local offset and adding
389: // the remote offset.
390: seed.setLastSeenUTC();
391: seed.setFlagDirectConnect(true);
392: } else {
393: // set connection flag
394: if (Math.abs(nowUTC0Time - ctimeUTC0) > 120000)
395: seed.setFlagDirectConnect(false); // 2 minutes
396: }
397:
398: // update latest version number
399: if (seed.getVersion() > yacyVersion.latestRelease)
400: yacyVersion.latestRelease = seed.getVersion();
401:
402: // prepare to update
403: if (disconnectedSeed != null) {
404: // if the indirect connect aims to announce a peer that we know
405: // has been disconnected then we compare the dates:
406: // if the new peer has a LastSeen date, and that date is before
407: // the disconnection date, then we ignore the new peer
408: if (!direct) {
409: if (ctimeUTC0 < dtimeUTC0) {
410: // the disconnection was later, we reject the connection
411: yacyCore.log
412: .logFine("connect: rejecting disconnected peer '"
413: + seed.getName()
414: + "' from "
415: + seed.getPublicAddress());
416: return false;
417: }
418: }
419:
420: // this is a return of a lost peer
421: yacyCore.log.logFine("connect: returned KNOWN " + peerType
422: + " peer '" + seed.getName() + "' from "
423: + seed.getPublicAddress());
424: this .seedDB.addConnected(seed);
425: return true;
426: } else {
427: final yacySeed connectedSeed = this .seedDB
428: .getConnected(seed.hash);
429: if (connectedSeed != null) {
430: // the seed is known: this is an update
431: try {
432: // if the old LastSeen date is later then the other
433: // info, then we reject the info
434: if ((ctimeUTC0 < (connectedSeed.getLastSeenUTC()))
435: && (!direct)) {
436: yacyCore.log
437: .logFine("connect: rejecting old info about peer '"
438: + seed.getName() + "'");
439: return false;
440: }
441:
442: if (connectedSeed.getName() != seed.getName()) {
443: // TODO: update seed name lookup cache
444: }
445: } catch (NumberFormatException e) {
446: yacyCore.log
447: .logFine("connect: rejecting wrong peer '"
448: + seed.getName() + "' from "
449: + seed.getPublicAddress()
450: + ". Cause: " + e.getMessage());
451: return false;
452: }
453: yacyCore.log.logFine("connect: updated KNOWN "
454: + ((direct) ? "direct " : "") + peerType
455: + " peer '" + seed.getName() + "' from "
456: + seed.getPublicAddress());
457: seedDB.addConnected(seed);
458: return true;
459: } else {
460: // the seed is new
461: if ((seedDB.mySeedIsDefined())
462: && (seed.get(yacySeed.IP, "127.0.0.1")
463: .equals(this .seedDB.mySeed().get(
464: yacySeed.IP, "127.0.0.1")))) {
465: // seed from the same IP as the calling client: can be
466: // the case if there runs another one over a NAT
467: yacyCore.log
468: .logFine("connect: saved NEW seed (myself IP) "
469: + seed.getPublicAddress());
470: } else {
471: // completely new seed
472: yacyCore.log.logFine("connect: saved NEW "
473: + peerType + " peer '" + seed.getName()
474: + "' from " + seed.getPublicAddress());
475: }
476: if (peerType.equals(yacySeed.PEERTYPE_SENIOR))
477: this .seniorConnects++; // update statistics
478: if (peerType.equals(yacySeed.PEERTYPE_PRINCIPAL))
479: this .principalConnects++; // update statistics
480: this .seedDB.addConnected(seed);
481: return true;
482: }
483: }
484: }
485:
486: private final void disconnectPeer(yacySeed seed, String cause) {
487: // we do this if we did not get contact with the other peer
488: yacyCore.log.logFine("connect: no contact to a "
489: + seed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN)
490: + " peer '" + seed.getName() + "' at "
491: + seed.getPublicAddress() + ". Cause: " + cause);
492: synchronized (seedDB) {
493: if (!seedDB.hasDisconnected(seed.hash)) {
494: disconnects++;
495: }
496: seed.put("dct", Long.toString(System.currentTimeMillis()));
497: seedDB.addDisconnected(seed); // update info
498: }
499: }
500:
501: public boolean peerArrival(yacySeed peer, boolean direct) {
502: if (peer == null)
503: return false;
504: boolean res = connectPeer(peer, direct);
505: // perform all actions if peer is effective new
506: if (res) {
507: Iterator<yacyPeerAction> i = actions.iterator();
508: while (i.hasNext())
509: i.next().processPeerArrival(peer, direct);
510: }
511: return res;
512: }
513:
514: public void peerDeparture(yacySeed peer, String cause) {
515: if (peer == null)
516: return;
517: disconnectPeer(peer, cause);
518: // perform all actions
519: Iterator<yacyPeerAction> i = actions.iterator();
520: while (i.hasNext())
521: i.next().processPeerDeparture(peer);
522: }
523:
524: public void peerPing(yacySeed peer) {
525: if (peer == null)
526: return;
527: // this is called only if the peer has junior status
528: seedDB.addPotential(peer);
529: // perform all actions
530: Iterator<yacyPeerAction> i = actions.iterator();
531: while (i.hasNext())
532: i.next().processPeerPing(peer);
533: }
534:
535: public void setUserAgent(String IP, String userAgent) {
536: userAgents.put(IP, userAgent);
537: }
538:
539: public String getUserAgent(String IP) {
540: String userAgent = (String) userAgents.get(IP);
541: return (userAgent == null) ? "" : userAgent;
542: }
543: }
|