0001: // yacySeedDB.java
0002: // -------------------------------------
0003: // (C) by Michael Peter Christen; mc@anomic.de
0004: // first published on http://www.anomic.de
0005: // Frankfurt, Germany, 2004, 2005
0006: //
0007: // $LastChangedDate: 2008-02-03 18:42:25 +0000 (So, 03 Feb 2008) $
0008: // $LastChangedRevision: 4437 $
0009: // $LastChangedBy: orbiter $
0010: //
0011: // This program is free software; you can redistribute it and/or modify
0012: // it under the terms of the GNU General Public License as published by
0013: // the Free Software Foundation; either version 2 of the License, or
0014: // (at your option) any later version.
0015: //
0016: // This program is distributed in the hope that it will be useful,
0017: // but WITHOUT ANY WARRANTY; without even the implied warranty of
0018: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0019: // GNU General Public License for more details.
0020: //
0021: // You should have received a copy of the GNU General Public License
0022: // along with this program; if not, write to the Free Software
0023: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0024: //
0025: // Using this software in any meaning (reading, learning, copying, compiling,
0026: // running) means that you agree that the Author(s) is (are) not responsible
0027: // for cost, loss of data or any harm that may be caused directly or indirectly
0028: // by usage of this softare or this documentation. The usage of this software
0029: // is on your own risk. The installation and usage (starting/running) of this
0030: // software may allow other people or application to access your computer and
0031: // any attached devices and is highly dependent on the configuration of the
0032: // software which must be done by the user of the software; the author(s) is
0033: // (are) also not responsible for proper configuration and usage of the
0034: // software, even if provoked by documentation provided together with
0035: // the software.
0036: //
0037: // Any changes to this file according to the GPL as documented in the file
0038: // gpl.txt aside this file in the shipment you received can be done to the
0039: // lines that follows this copyright notice here, but changes must not be
0040: // done inside the copyright notive above. A re-distribution must contain
0041: // the intact and unchanged copyright notice.
0042: // Contributions and changes to the program code must be marked as such.
0043:
0044: package de.anomic.yacy;
0045:
0046: import java.io.BufferedWriter;
0047: import java.io.File;
0048: import java.io.FileWriter;
0049: import java.io.IOException;
0050: import java.io.PrintWriter;
0051: import java.lang.ref.SoftReference;
0052: import java.lang.reflect.Method;
0053: import java.net.InetAddress;
0054: import java.net.UnknownHostException;
0055: import java.util.ArrayList;
0056: import java.util.HashMap;
0057: import java.util.HashSet;
0058: import java.util.Hashtable;
0059: import java.util.Iterator;
0060: import java.util.Map;
0061: import java.util.TreeMap;
0062:
0063: import de.anomic.http.httpHeader;
0064: import de.anomic.http.httpc;
0065: import de.anomic.http.httpd;
0066: import de.anomic.kelondro.kelondroBase64Order;
0067: import de.anomic.kelondro.kelondroDyn;
0068: import de.anomic.kelondro.kelondroException;
0069: import de.anomic.kelondro.kelondroMScoreCluster;
0070: import de.anomic.kelondro.kelondroMapObjects;
0071: import de.anomic.plasma.plasmaHTCache;
0072: import de.anomic.plasma.plasmaSwitchboard;
0073: import de.anomic.server.serverCore;
0074: import de.anomic.server.serverDate;
0075: import de.anomic.server.serverDomains;
0076: import de.anomic.server.serverFileUtils;
0077: import de.anomic.server.serverSwitch;
0078: import de.anomic.server.logging.serverLog;
0079: import de.anomic.tools.nxTools;
0080:
0081: public final class yacySeedDB {
0082:
0083: // global statics
0084:
0085: /**
0086: * this is the lenght(12) of the hash key that is used:<br>
0087: * - for seed hashes (this Object)<br>
0088: * - for word hashes (plasmaIndexEntry.wordHashLength)<br>
0089: * - for L-URL hashes (plasmaLURL.urlHashLength)<br><br>
0090: * these hashes all shall be generated by base64.enhancedCoder
0091: */
0092: public static final int commonHashLength = 12;
0093: public static final int dhtActivityMagic = 32;
0094:
0095: public static final String[] sortFields = new String[] {
0096: yacySeed.LCOUNT, yacySeed.ICOUNT, yacySeed.UPTIME,
0097: yacySeed.VERSION, yacySeed.LASTSEEN };
0098: public static final String[] longaccFields = new String[] {
0099: yacySeed.LCOUNT, yacySeed.ICOUNT, yacySeed.ISPEED };
0100: public static final String[] doubleaccFields = new String[] { yacySeed.RSPEED };
0101:
0102: // class objects
0103: protected File seedActiveDBFile, seedPassiveDBFile,
0104: seedPotentialDBFile;
0105:
0106: protected kelondroMapObjects seedActiveDB, seedPassiveDB,
0107: seedPotentialDB;
0108: private long preloadTime;
0109:
0110: private final plasmaSwitchboard sb;
0111: private yacySeed mySeed; // my own seed
0112:
0113: private final Hashtable<String, yacySeed> nameLookupCache;
0114: private final Hashtable<InetAddress, SoftReference<yacySeed>> ipLookupCache;
0115:
0116: public yacySeedDB(plasmaSwitchboard sb, File seedActiveDBFile,
0117: File seedPassiveDBFile, File seedPotentialDBFile,
0118: long preloadTime) {
0119:
0120: this .seedActiveDBFile = seedActiveDBFile;
0121: this .seedPassiveDBFile = seedPassiveDBFile;
0122: this .seedPotentialDBFile = seedPotentialDBFile;
0123: this .mySeed = null; // my own seed
0124: this .sb = sb;
0125: this .preloadTime = preloadTime;
0126:
0127: // set up seed database
0128: seedActiveDB = openSeedTable(seedActiveDBFile);
0129: seedPassiveDB = openSeedTable(seedPassiveDBFile);
0130: seedPotentialDB = openSeedTable(seedPotentialDBFile);
0131:
0132: // start our virtual DNS service for yacy peers with empty cache
0133: nameLookupCache = new Hashtable<String, yacySeed>();
0134:
0135: // cache for reverse name lookup
0136: ipLookupCache = new Hashtable<InetAddress, SoftReference<yacySeed>>();
0137:
0138: // check if we are in the seedCaches: this can happen if someone else published our seed
0139: removeMySeed();
0140: }
0141:
0142: private synchronized void initMySeed() {
0143: if (this .mySeed != null)
0144: return;
0145:
0146: // create or init own seed
0147: File myOwnSeedFile = sb.getOwnSeedFile();
0148: if (myOwnSeedFile.length() > 0)
0149: try {
0150: // load existing identity
0151: mySeed = yacySeed.load(myOwnSeedFile);
0152: } catch (IOException e) {
0153: // create new identity
0154: mySeed = yacySeed.genLocalSeed(sb);
0155: try {
0156: mySeed.save(myOwnSeedFile);
0157: } catch (IOException ee) {
0158: ee.printStackTrace();
0159: System.exit(-1);
0160: }
0161: }
0162: else {
0163: // create new identity
0164: mySeed = yacySeed.genLocalSeed(sb);
0165: try {
0166: mySeed.save(myOwnSeedFile);
0167: } catch (IOException ee) {
0168: ee.printStackTrace();
0169: System.exit(-1);
0170: }
0171: }
0172:
0173: if (sb.getConfig("portForwardingEnabled", "false")
0174: .equalsIgnoreCase("true")) {
0175: mySeed.put(yacySeed.PORT, sb.getConfig(
0176: "portForwardingPort", "8080"));
0177: mySeed.put(yacySeed.IP, sb.getConfig("portForwardingHost",
0178: "localhost"));
0179: } else {
0180: mySeed.put(yacySeed.IP, ""); // we delete the old information to see what we have now
0181: mySeed.put(yacySeed.PORT, Integer.toString(serverCore
0182: .getPortNr(sb.getConfig("port", "8080")))); // set my seed's correct port number
0183: }
0184: mySeed.put(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN); // markup startup condition
0185:
0186: }
0187:
0188: public boolean mySeedIsDefined() {
0189: return this .mySeed != null;
0190: }
0191:
0192: public yacySeed mySeed() {
0193: if (this .mySeed == null) {
0194: if (this .sizeConnected() == 0)
0195: try {
0196: Thread.sleep(5000);
0197: } catch (InterruptedException e) {
0198: } // wait for init
0199: initMySeed();
0200: }
0201: return this .mySeed;
0202: }
0203:
0204: public synchronized void removeMySeed() {
0205: if ((seedActiveDB.size() == 0) && (seedPassiveDB.size() == 0)
0206: && (seedPotentialDB.size() == 0))
0207: return; // avoid that the own seed is initialized too early
0208: if (this .mySeed == null)
0209: initMySeed();
0210: try {
0211: seedActiveDB.remove(mySeed.hash);
0212: seedPassiveDB.remove(mySeed.hash);
0213: seedPotentialDB.remove(mySeed.hash);
0214: } catch (IOException e) {
0215: }
0216: }
0217:
0218: public boolean noDHTActivity() {
0219: // for small networks, we don't perform DHT transmissions, because it is possible to search over all peers
0220: return this .sizeConnected() <= dhtActivityMagic;
0221: }
0222:
0223: @SuppressWarnings("unchecked")
0224: private synchronized kelondroMapObjects openSeedTable(
0225: File seedDBFile) {
0226: final boolean usetree = false;
0227: new File(seedDBFile.getParent()).mkdirs();
0228: Class[] args;
0229: try {
0230: args = new Class[] { "".getClass(),
0231: Class.forName("java.util.Map") };
0232: } catch (ClassNotFoundException e2) {
0233: e2.printStackTrace();
0234: args = null;
0235: }
0236: Method initializeHandlerMethod;
0237: try {
0238: initializeHandlerMethod = this .getClass().getMethod(
0239: "initializeHandler", args);
0240: } catch (SecurityException e1) {
0241: e1.printStackTrace();
0242: initializeHandlerMethod = null;
0243: } catch (NoSuchMethodException e1) {
0244: e1.printStackTrace();
0245: initializeHandlerMethod = null;
0246: }
0247: try {
0248: return new kelondroMapObjects(new kelondroDyn(seedDBFile,
0249: true, true, preloadTime / 3, commonHashLength, 480,
0250: '#', kelondroBase64Order.enhancedCoder, usetree,
0251: false, true), 500, sortFields, longaccFields,
0252: doubleaccFields, initializeHandlerMethod, this );
0253: } catch (Exception e) {
0254: // try again
0255: kelondroDyn.delete(seedDBFile, usetree);
0256: return new kelondroMapObjects(new kelondroDyn(seedDBFile,
0257: true, true, preloadTime / 3, commonHashLength, 480,
0258: '#', kelondroBase64Order.enhancedCoder, usetree,
0259: false, true), 500, sortFields, longaccFields,
0260: doubleaccFields, initializeHandlerMethod, this );
0261: }
0262: }
0263:
0264: protected synchronized kelondroMapObjects resetSeedTable(
0265: kelondroMapObjects seedDB, File seedDBFile) {
0266: // this is an emergency function that should only be used if any problem with the
0267: // seed.db is detected
0268: yacyCore.log.logFine("seed-db " + seedDBFile.toString()
0269: + " reset (on-the-fly)");
0270: seedDB.close();
0271: seedDBFile.delete();
0272: // create new seed database
0273: seedDB = openSeedTable(seedDBFile);
0274: return seedDB;
0275: }
0276:
0277: public synchronized void resetActiveTable() {
0278: seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile);
0279: }
0280:
0281: public synchronized void resetPassiveTable() {
0282: seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile);
0283: }
0284:
0285: public synchronized void resetPotentialTable() {
0286: seedPotentialDB = resetSeedTable(seedPotentialDB,
0287: seedPotentialDBFile);
0288: }
0289:
0290: public void close() {
0291: if (seedActiveDB != null)
0292: seedActiveDB.close();
0293: if (seedPassiveDB != null)
0294: seedPassiveDB.close();
0295: if (seedPotentialDB != null)
0296: seedPotentialDB.close();
0297: }
0298:
0299: @SuppressWarnings("unchecked")
0300: public void initializeHandler(String mapname, Map map) {
0301: // this is used to set up a lastSeen lookup table
0302:
0303: }
0304:
0305: public Iterator<yacySeed> seedsSortedConnected(boolean up,
0306: String field) {
0307: // enumerates seed-type objects: all seeds sequentially ordered by field
0308: return new seedEnum(up, field, seedActiveDB);
0309: }
0310:
0311: public Iterator<yacySeed> seedsSortedDisconnected(boolean up,
0312: String field) {
0313: // enumerates seed-type objects: all seeds sequentially ordered by field
0314: return new seedEnum(up, field, seedPassiveDB);
0315: }
0316:
0317: public Iterator<yacySeed> seedsSortedPotential(boolean up,
0318: String field) {
0319: // enumerates seed-type objects: all seeds sequentially ordered by field
0320: return new seedEnum(up, field, seedPotentialDB);
0321: }
0322:
0323: public TreeMap<String, String> /* peer-b64-hashes/ipport */clusterHashes(
0324: String clusterdefinition) {
0325: // collects seeds according to cluster definition string, which consists of
0326: // comma-separated .yacy or .yacyh-domains
0327: // the domain may be extended by an alternative address specification of the form
0328: // <ip> or <ip>:<port>. The port must be identical to the port specified in the peer seed,
0329: // therefore it is optional. The address specification is separated by a '='; the complete
0330: // address has therefore the form
0331: // address ::= (<peername>'.yacy'|<peerhexhash>'.yacyh'){'='<ip>{':'<port}}
0332: // clusterdef ::= {address}{','address}*
0333: String[] addresses = (clusterdefinition.length() == 0) ? new String[0]
0334: : clusterdefinition.split(",");
0335: TreeMap<String, String> clustermap = new TreeMap<String, String>(
0336: kelondroBase64Order.enhancedComparator);
0337: yacySeed seed;
0338: String hash, yacydom, ipport;
0339: int p;
0340: for (int i = 0; i < addresses.length; i++) {
0341: p = addresses[i].indexOf('=');
0342: if (p >= 0) {
0343: yacydom = addresses[i].substring(0, p);
0344: ipport = addresses[i].substring(p + 1);
0345: } else {
0346: yacydom = addresses[i];
0347: ipport = null;
0348: }
0349: if (yacydom.endsWith(".yacyh")) {
0350: // find a peer with its hexhash
0351: hash = yacySeed.hexHash2b64Hash(yacydom.substring(0,
0352: yacydom.length() - 6));
0353: seed = get(hash);
0354: if (seed == null) {
0355: yacyCore.log.logWarning("cluster peer '" + yacydom
0356: + "' was not found.");
0357: } else {
0358: clustermap.put(hash, ipport);
0359: }
0360: } else if (yacydom.endsWith(".yacy")) {
0361: // find a peer with its name
0362: seed = lookupByName(yacydom.substring(0, yacydom
0363: .length() - 5));
0364: if (seed == null) {
0365: yacyCore.log.logWarning("cluster peer '" + yacydom
0366: + "' was not found.");
0367: } else {
0368: clustermap.put(seed.hash, ipport);
0369: }
0370: } else {
0371: yacyCore.log
0372: .logWarning("cluster peer '"
0373: + addresses[i]
0374: + "' has wrong syntax. the name must end with .yacy or .yacyh");
0375: }
0376: }
0377: return clustermap;
0378: }
0379:
0380: public Iterator<yacySeed> seedsConnected(boolean up, boolean rot,
0381: String firstHash, float minVersion) {
0382: // enumerates seed-type objects: all seeds sequentially without order
0383: return new seedEnum(up, rot, (firstHash == null) ? null
0384: : firstHash.getBytes(), null, seedActiveDB, minVersion);
0385: }
0386:
0387: public Iterator<yacySeed> seedsDisconnected(boolean up,
0388: boolean rot, String firstHash, float minVersion) {
0389: // enumerates seed-type objects: all seeds sequentially without order
0390: return new seedEnum(up, rot, (firstHash == null) ? null
0391: : firstHash.getBytes(), null, seedPassiveDB, minVersion);
0392: }
0393:
0394: public Iterator<yacySeed> seedsPotential(boolean up, boolean rot,
0395: String firstHash, float minVersion) {
0396: // enumerates seed-type objects: all seeds sequentially without order
0397: return new seedEnum(up, rot, (firstHash == null) ? null
0398: : firstHash.getBytes(), null, seedPotentialDB,
0399: minVersion);
0400: }
0401:
0402: public yacySeed anySeedVersion(float minVersion) {
0403: // return just any seed that has a specific minimum version number
0404: Iterator<yacySeed> e = seedsConnected(true, true, yacySeed
0405: .randomHash(), minVersion);
0406: return (yacySeed) e.next();
0407: }
0408:
0409: public HashMap<String, yacySeed> seedsByAge(boolean up, int count) {
0410: // returns a peerhash/yacySeed relation
0411: // to get most recent peers, set up = true; for oldest peers, set up = false
0412:
0413: if (count > sizeConnected())
0414: count = sizeConnected();
0415:
0416: // fill a score object
0417: kelondroMScoreCluster<String> seedScore = new kelondroMScoreCluster<String>();
0418: yacySeed ys;
0419: long absage;
0420: Iterator<yacySeed> s = seedsConnected(true, false, null,
0421: (float) 0.0);
0422: int searchcount = 1000;
0423: if (searchcount > sizeConnected())
0424: searchcount = sizeConnected();
0425: try {
0426: while ((s.hasNext()) && (searchcount-- > 0)) {
0427: ys = s.next();
0428: if ((ys != null)
0429: && (ys.get(yacySeed.LASTSEEN, "").length() > 10))
0430: try {
0431: absage = Math.abs(System.currentTimeMillis()
0432: + serverDate.dayMillis
0433: - ys.getLastSeenUTC());
0434: seedScore.addScore(ys.hash, (int) absage); // the higher absage, the older is the peer
0435: } catch (Exception e) {
0436: }
0437: }
0438:
0439: // result is now in the score object; create a result vector
0440: HashMap<String, yacySeed> result = new HashMap<String, yacySeed>();
0441: Iterator<String> it = seedScore.scores(up);
0442: int c = 0;
0443: while ((c < count) && (it.hasNext())) {
0444: c++;
0445: ys = getConnected((String) it.next());
0446: if ((ys != null) && (ys.hash != null))
0447: result.put(ys.hash, ys);
0448: }
0449: return result;
0450: } catch (kelondroException e) {
0451: seedActiveDB = resetSeedTable(seedActiveDB,
0452: seedActiveDBFile);
0453: yacyCore.log.logFine(
0454: "Internal Error at yacySeedDB.seedsByAge: "
0455: + e.getMessage(), e);
0456: return null;
0457: }
0458: }
0459:
0460: public int sizeConnected() {
0461: return seedActiveDB.size();
0462: /*
0463: Enumeration e = seedsConnected(true, false, null);
0464: int c = 0; while (e.hasMoreElements()) {c++; e.nextElement();}
0465: return c;
0466: */
0467: }
0468:
0469: public int sizeDisconnected() {
0470: return seedPassiveDB.size();
0471: /*
0472: Enumeration e = seedsDisconnected(true, false, null);
0473: int c = 0; while (e.hasMoreElements()) {c++; e.nextElement();}
0474: return c;
0475: */
0476: }
0477:
0478: public int sizePotential() {
0479: return seedPotentialDB.size();
0480: /*
0481: Enumeration e = seedsPotential(true, false, null);
0482: int c = 0; while (e.hasMoreElements()) {c++; e.nextElement();}
0483: return c;
0484: */
0485: }
0486:
0487: public long countActiveURL() {
0488: return seedActiveDB.getLongAcc(yacySeed.LCOUNT);
0489: }
0490:
0491: public long countActiveRWI() {
0492: return seedActiveDB.getLongAcc(yacySeed.ICOUNT);
0493: }
0494:
0495: public long countActivePPM() {
0496: return seedActiveDB.getLongAcc(yacySeed.ISPEED);
0497: }
0498:
0499: public double countActiveQPM() {
0500: return seedActiveDB.getDoubleAcc(yacySeed.RSPEED);
0501: }
0502:
0503: public long countPassiveURL() {
0504: return seedPassiveDB.getLongAcc(yacySeed.LCOUNT);
0505: }
0506:
0507: public long countPassiveRWI() {
0508: return seedPassiveDB.getLongAcc(yacySeed.ICOUNT);
0509: }
0510:
0511: public long countPotentialURL() {
0512: return seedPotentialDB.getLongAcc(yacySeed.LCOUNT);
0513: }
0514:
0515: public long countPotentialRWI() {
0516: return seedPotentialDB.getLongAcc(yacySeed.ICOUNT);
0517: }
0518:
0519: public synchronized void addConnected(yacySeed seed) {
0520: if ((seed == null) || (seed.isProper() != null))
0521: return;
0522: //seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
0523: try {
0524: nameLookupCache.put(seed.getName(), seed);
0525: HashMap<String, String> seedPropMap = seed.getMap();
0526: synchronized (seedPropMap) {
0527: seedActiveDB.set(seed.hash, seedPropMap);
0528: }
0529: seedPassiveDB.remove(seed.hash);
0530: seedPotentialDB.remove(seed.hash);
0531: } catch (IOException e) {
0532: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0533: + e.getMessage() + "); resetting seed.db", e);
0534: resetActiveTable();
0535: } catch (kelondroException e) {
0536: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0537: + e.getMessage() + "); resetting seed.db", e);
0538: resetActiveTable();
0539: } catch (IllegalArgumentException e) {
0540: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0541: + e.getMessage() + "); resetting seed.db", e);
0542: resetActiveTable();
0543: }
0544: }
0545:
0546: public synchronized void addDisconnected(yacySeed seed) {
0547: if (seed == null)
0548: return;
0549: try {
0550: nameLookupCache.remove(seed.getName());
0551: seedActiveDB.remove(seed.hash);
0552: seedPotentialDB.remove(seed.hash);
0553: } catch (Exception e) {
0554: }
0555: //seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
0556: try {
0557: HashMap<String, String> seedPropMap = seed.getMap();
0558: synchronized (seedPropMap) {
0559: seedPassiveDB.set(seed.hash, seedPropMap);
0560: }
0561: } catch (IOException e) {
0562: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0563: + e.getMessage() + "); resetting seed.db", e);
0564: resetPassiveTable();
0565: } catch (kelondroException e) {
0566: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0567: + e.getMessage() + "); resetting seed.db", e);
0568: resetPassiveTable();
0569: } catch (IllegalArgumentException e) {
0570: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0571: + e.getMessage() + "); resetting seed.db", e);
0572: resetPassiveTable();
0573: }
0574: }
0575:
0576: public synchronized void addPotential(yacySeed seed) {
0577: if (seed == null)
0578: return;
0579: try {
0580: nameLookupCache.remove(seed.getName());
0581: seedActiveDB.remove(seed.hash);
0582: seedPassiveDB.remove(seed.hash);
0583: } catch (Exception e) {
0584: }
0585: if (seed.isProper() != null)
0586: return;
0587: //seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
0588: try {
0589: HashMap<String, String> seedPropMap = seed.getMap();
0590: synchronized (seedPropMap) {
0591: seedPotentialDB.set(seed.hash, seedPropMap);
0592: }
0593: } catch (IOException e) {
0594: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0595: + e.getMessage() + "); resetting seed.db", e);
0596: resetPotentialTable();
0597: } catch (kelondroException e) {
0598: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0599: + e.getMessage() + "); resetting seed.db", e);
0600: resetPotentialTable();
0601: } catch (IllegalArgumentException e) {
0602: yacyCore.log.logSevere("ERROR add: seed.db corrupt ("
0603: + e.getMessage() + "); resetting seed.db", e);
0604: resetPotentialTable();
0605: }
0606: }
0607:
0608: public synchronized void removeDisconnected(String peerHash) {
0609: if (peerHash == null)
0610: return;
0611: try {
0612: seedPassiveDB.remove(peerHash);
0613: } catch (IOException e) {
0614: }
0615: }
0616:
0617: public synchronized void removePotential(String peerHash) {
0618: if (peerHash == null)
0619: return;
0620: try {
0621: seedPotentialDB.remove(peerHash);
0622: } catch (IOException e) {
0623: }
0624: }
0625:
0626: public boolean hasConnected(String hash) {
0627: try {
0628: return (seedActiveDB.get(hash) != null);
0629: } catch (IOException e) {
0630: return false;
0631: }
0632: }
0633:
0634: public boolean hasDisconnected(String hash) {
0635: try {
0636: return (seedPassiveDB.get(hash) != null);
0637: } catch (IOException e) {
0638: return false;
0639: }
0640: }
0641:
0642: public boolean hasPotential(String hash) {
0643: try {
0644: return (seedPotentialDB.get(hash) != null);
0645: } catch (IOException e) {
0646: return false;
0647: }
0648: }
0649:
0650: private yacySeed get(String hash, kelondroMapObjects database) {
0651: if (hash == null)
0652: return null;
0653: if ((this .mySeed != null) && (hash.equals(mySeed.hash)))
0654: return mySeed;
0655: HashMap<String, String> entry = database.getMap(hash);
0656: if (entry == null)
0657: return null;
0658: return new yacySeed(hash, entry);
0659: }
0660:
0661: public yacySeed getConnected(String hash) {
0662: return get(hash, seedActiveDB);
0663: }
0664:
0665: public yacySeed getDisconnected(String hash) {
0666: return get(hash, seedPassiveDB);
0667: }
0668:
0669: public yacySeed getPotential(String hash) {
0670: return get(hash, seedPotentialDB);
0671: }
0672:
0673: public yacySeed get(String hash) {
0674: yacySeed seed = getConnected(hash);
0675: if (seed == null)
0676: seed = getDisconnected(hash);
0677: if (seed == null)
0678: seed = getPotential(hash);
0679: return seed;
0680: }
0681:
0682: public void update(String hash, yacySeed seed) {
0683: if (this .mySeed == null)
0684: initMySeed();
0685: if (hash.equals(mySeed.hash)) {
0686: mySeed = seed;
0687: return;
0688: }
0689:
0690: yacySeed s = get(hash, seedActiveDB);
0691: if (s != null)
0692: try {
0693: seedActiveDB.set(hash, seed.getMap());
0694: return;
0695: } catch (IOException e) {
0696: }
0697:
0698: s = get(hash, seedPassiveDB);
0699: if (s != null)
0700: try {
0701: seedPassiveDB.set(hash, seed.getMap());
0702: return;
0703: } catch (IOException e) {
0704: }
0705:
0706: s = get(hash, seedPotentialDB);
0707: if (s != null)
0708: try {
0709: seedPotentialDB.set(hash, seed.getMap());
0710: return;
0711: } catch (IOException e) {
0712: }
0713: }
0714:
0715: public yacySeed lookupByName(String peerName) {
0716: // reads a seed by searching by name
0717:
0718: // local peer?
0719: if (peerName.equals("localpeer")) {
0720: if (this .mySeed == null)
0721: initMySeed();
0722: return mySeed;
0723: }
0724:
0725: // then try to use the cache
0726: yacySeed seed = (yacySeed) nameLookupCache.get(peerName);
0727: if (seed != null)
0728: return seed;
0729:
0730: // enumerate the cache and simultanous insert values
0731: String name;
0732: for (int table = 0; table < 2; table++) {
0733: Iterator<yacySeed> e = (table == 0) ? seedsConnected(true,
0734: false, null, (float) 0.0) : seedsDisconnected(true,
0735: false, null, (float) 0.0);
0736: while (e.hasNext()) {
0737: seed = (yacySeed) e.next();
0738: if (seed != null) {
0739: name = seed.getName().toLowerCase();
0740: if (seed.isProper() == null)
0741: nameLookupCache.put(name, seed);
0742: if (name.equals(peerName))
0743: return seed;
0744: }
0745: }
0746: }
0747: // check local seed
0748: if (this .mySeed == null)
0749: initMySeed();
0750: name = mySeed.getName().toLowerCase();
0751: if (mySeed.isProper() == null)
0752: nameLookupCache.put(name, mySeed);
0753: if (name.equals(peerName))
0754: return mySeed;
0755: // nothing found
0756: return null;
0757: }
0758:
0759: public yacySeed lookupByIP(InetAddress peerIP,
0760: boolean lookupConnected, boolean lookupDisconnected,
0761: boolean lookupPotential) {
0762:
0763: if (peerIP == null)
0764: return null;
0765: yacySeed seed = null;
0766:
0767: // local peer?
0768: if (httpd.isThisHostIP(peerIP)) {
0769: if (this .mySeed == null)
0770: initMySeed();
0771: return mySeed;
0772: }
0773:
0774: // then try to use the cache
0775: SoftReference<yacySeed> ref = ipLookupCache.get(peerIP);
0776: if (ref != null) {
0777: seed = (yacySeed) ref.get();
0778: if (seed != null)
0779: return seed;
0780: }
0781:
0782: int pos = -1;
0783: String addressStr = null;
0784: InetAddress seedIPAddress = null;
0785: HashSet<String> badPeerHashes = new HashSet<String>();
0786:
0787: if (lookupConnected) {
0788: // enumerate the cache and simultanous insert values
0789: Iterator<yacySeed> e = seedsConnected(true, false, null,
0790: (float) 0.0);
0791: while (e.hasNext()) {
0792: try {
0793: seed = (yacySeed) e.next();
0794: if (seed != null) {
0795: addressStr = seed.getPublicAddress();
0796: if (addressStr == null) {
0797: serverLog.logWarning("YACY",
0798: "lookupByIP/Connected: address of seed "
0799: + seed.getName() + "/"
0800: + seed.hash + " is null.");
0801: badPeerHashes.add(seed.hash);
0802: continue;
0803: }
0804: if ((pos = addressStr.indexOf(":")) != -1) {
0805: addressStr = addressStr.substring(0, pos);
0806: }
0807: seedIPAddress = InetAddress
0808: .getByName(addressStr);
0809: if (seed.isProper() == null)
0810: ipLookupCache.put(seedIPAddress,
0811: new SoftReference<yacySeed>(seed));
0812: if (seedIPAddress.equals(peerIP))
0813: return seed;
0814: }
0815: } catch (UnknownHostException ex) {
0816: }
0817: }
0818: // delete bad peers
0819: Iterator<String> i = badPeerHashes.iterator();
0820: while (i.hasNext())
0821: try {
0822: seedActiveDB.remove(i.next());
0823: } catch (IOException e1) {
0824: e1.printStackTrace();
0825: }
0826: badPeerHashes.clear();
0827: }
0828:
0829: if (lookupDisconnected) {
0830: // enumerate the cache and simultanous insert values
0831: Iterator<yacySeed> e = seedsDisconnected(true, false, null,
0832: (float) 0.0);
0833:
0834: while (e.hasNext()) {
0835: try {
0836: seed = (yacySeed) e.next();
0837: if (seed != null) {
0838: addressStr = seed.getPublicAddress();
0839: if (addressStr == null) {
0840: serverLog.logWarning("YACY",
0841: "lookupByIPDisconnected: address of seed "
0842: + seed.getName() + "/"
0843: + seed.hash + " is null.");
0844: badPeerHashes.add(seed.hash);
0845: continue;
0846: }
0847: if ((pos = addressStr.indexOf(":")) != -1) {
0848: addressStr = addressStr.substring(0, pos);
0849: }
0850: seedIPAddress = InetAddress
0851: .getByName(addressStr);
0852: if (seed.isProper() == null)
0853: ipLookupCache.put(seedIPAddress,
0854: new SoftReference<yacySeed>(seed));
0855: if (seedIPAddress.equals(peerIP))
0856: return seed;
0857: }
0858: } catch (UnknownHostException ex) {
0859: }
0860: }
0861: // delete bad peers
0862: Iterator<String> i = badPeerHashes.iterator();
0863: while (i.hasNext())
0864: try {
0865: seedActiveDB.remove(i.next());
0866: } catch (IOException e1) {
0867: e1.printStackTrace();
0868: }
0869: badPeerHashes.clear();
0870: }
0871:
0872: if (lookupPotential) {
0873: // enumerate the cache and simultanous insert values
0874: Iterator<yacySeed> e = seedsPotential(true, false, null,
0875: (float) 0.0);
0876:
0877: while (e.hasNext()) {
0878: try {
0879: seed = (yacySeed) e.next();
0880: if ((seed != null)
0881: && ((addressStr = seed.getPublicAddress()) != null)) {
0882: if ((pos = addressStr.indexOf(":")) != -1) {
0883: addressStr = addressStr.substring(0, pos);
0884: }
0885: seedIPAddress = InetAddress
0886: .getByName(addressStr);
0887: if (seed.isProper() == null)
0888: ipLookupCache.put(seedIPAddress,
0889: new SoftReference<yacySeed>(seed));
0890: if (seedIPAddress.equals(peerIP))
0891: return seed;
0892: }
0893: } catch (UnknownHostException ex) {
0894: }
0895: }
0896: }
0897:
0898: try {
0899: // check local seed
0900: if (this .mySeed == null)
0901: return null;
0902: addressStr = mySeed.getPublicAddress();
0903: if (addressStr == null)
0904: return null;
0905: if ((pos = addressStr.indexOf(":")) != -1) {
0906: addressStr = addressStr.substring(0, pos);
0907: }
0908: seedIPAddress = InetAddress.getByName(addressStr);
0909: if (mySeed.isProper() == null)
0910: ipLookupCache.put(seedIPAddress,
0911: new SoftReference<yacySeed>(mySeed));
0912: if (seedIPAddress.equals(peerIP))
0913: return mySeed;
0914: // nothing found
0915: return null;
0916: } catch (UnknownHostException e2) {
0917: return null;
0918: }
0919: }
0920:
0921: public ArrayList<String> storeCache(File seedFile)
0922: throws IOException {
0923: return storeCache(seedFile, false);
0924: }
0925:
0926: private ArrayList<String> storeCache(File seedFile,
0927: boolean addMySeed) throws IOException {
0928: PrintWriter pw = null;
0929: ArrayList<String> v = new ArrayList<String>(
0930: seedActiveDB.size() + 1);
0931: try {
0932:
0933: pw = new PrintWriter(new BufferedWriter(new FileWriter(
0934: seedFile)));
0935:
0936: // store own seed
0937: String line;
0938: if (this .mySeed == null)
0939: initMySeed();
0940: if (addMySeed) {
0941: line = mySeed.genSeedStr(null);
0942: v.add(line);
0943: pw.print(line + serverCore.CRLF_STRING);
0944: }
0945:
0946: // store other seeds
0947: yacySeed ys;
0948: Iterator<yacySeed> se = seedsConnected(true, false, null,
0949: (float) 0.0);
0950: while (se.hasNext()) {
0951: ys = (yacySeed) se.next();
0952: if (ys != null) {
0953: line = ys.genSeedStr(null);
0954: v.add(line);
0955: pw.print(line + serverCore.CRLF_STRING);
0956: }
0957: }
0958: pw.flush();
0959: } finally {
0960: if (pw != null)
0961: try {
0962: pw.close();
0963: } catch (Exception e) {
0964: }
0965: }
0966: return v;
0967: }
0968:
0969: public String uploadCache(yacySeedUploader uploader,
0970: serverSwitch sb, yacySeedDB seedDB,
0971: // String seedFTPServer,
0972: // String seedFTPAccount,
0973: // String seedFTPPassword,
0974: // File seedFTPPath,
0975: yacyURL seedURL) throws Exception {
0976:
0977: // upload a seed file, if possible
0978: if (seedURL == null)
0979: throw new NullPointerException(
0980: "UPLOAD - Error: URL not given");
0981:
0982: String log = null;
0983: File seedFile = null;
0984: try {
0985: // create a seed file which for uploading ...
0986: seedFile = File.createTempFile("seedFile", ".txt",
0987: plasmaHTCache.cachePath);
0988: seedFile.deleteOnExit();
0989: serverLog.logFine("YACY",
0990: "SaveSeedList: Storing seedlist into tempfile "
0991: + seedFile.toString());
0992: ArrayList<String> uv = storeCache(seedFile, true);
0993:
0994: // uploading the seed file
0995: serverLog.logFine("YACY",
0996: "SaveSeedList: Trying to upload seed-file, "
0997: + seedFile.length() + " bytes, "
0998: + uv.size() + " entries.");
0999: log = uploader.uploadSeedFile(sb, seedDB, seedFile);
1000:
1001: // test download
1002: serverLog.logFine("YACY",
1003: "SaveSeedList: Trying to download seed-file '"
1004: + seedURL + "'.");
1005: ArrayList<String> check = downloadSeedFile(seedURL);
1006:
1007: // Comparing if local copy and uploaded copy are equal
1008: String errorMsg = checkCache(uv, check);
1009: if (errorMsg == null)
1010: log = log
1011: + "UPLOAD CHECK - Success: the result vectors are equal"
1012: + serverCore.CRLF_STRING;
1013: else {
1014: throw new Exception(
1015: "UPLOAD CHECK - Error: the result vector is different. "
1016: + errorMsg + serverCore.CRLF_STRING);
1017: }
1018: } finally {
1019: if (seedFile != null)
1020: try {
1021: seedFile.delete();
1022: } catch (Exception e) {/* ignore this */
1023: }
1024: }
1025:
1026: return log;
1027: }
1028:
1029: private ArrayList<String> downloadSeedFile(yacyURL seedURL)
1030: throws IOException {
1031: httpc remote = null;
1032: try {
1033: // init httpc
1034: remote = new httpc(seedURL.getHost(), seedURL.getHost(),
1035: seedURL.getPort(), 10000, seedURL.getProtocol()
1036: .equalsIgnoreCase("https"),
1037: sb.remoteProxyConfig, null, null);
1038:
1039: // Configure http headers
1040: httpHeader reqHeader = new httpHeader();
1041: reqHeader.put(httpHeader.PRAGMA, "no-cache");
1042: reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
1043:
1044: // send request
1045: httpc.response res = remote.GET(seedURL.getFile(),
1046: reqHeader);
1047:
1048: // check response code
1049: if (res.statusCode != 200) {
1050: throw new IOException("Server returned status: "
1051: + res.status);
1052: }
1053:
1054: // read byte array
1055: byte[] content = serverFileUtils.read(res
1056: .getContentInputStream());
1057: remote.close();
1058:
1059: // uncompress it if it is gzipped
1060: content = serverFileUtils.uncompressGZipArray(content);
1061:
1062: // convert it into an array
1063: return nxTools.strings(content, "UTF-8");
1064: } catch (Exception e) {
1065: throw new IOException("Unable to download seed file '"
1066: + seedURL + "'. " + e.getMessage());
1067: }
1068: }
1069:
1070: private String checkCache(ArrayList<String> uv,
1071: ArrayList<String> check) {
1072: if ((check == null) || (uv == null)
1073: || (uv.size() != check.size())) {
1074: serverLog.logFine("YACY",
1075: "SaveSeedList: Local and uploades seed-list "
1076: + "contains varying numbers of entries."
1077: + "\n\tLocal seed-list: "
1078: + ((uv == null) ? "null" : Integer
1079: .toString(uv.size()))
1080: + " entries"
1081: + "\n\tRemote seed-list: "
1082: + ((check == null) ? "null" : Integer
1083: .toString(check.size()))
1084: + " enties");
1085: return "Entry count is different: uv.size() = "
1086: + ((uv == null) ? "null" : Integer.toString(uv
1087: .size()))
1088: + ", check = "
1089: + ((check == null) ? "null" : Integer
1090: .toString(check.size()));
1091: }
1092:
1093: serverLog
1094: .logFine("YACY",
1095: "SaveSeedList: Comparing local and uploades seed-list entries ...");
1096: int i;
1097: for (i = 0; i < uv.size(); i++) {
1098: if (!(((String) uv.get(i)).equals((String) check.get(i))))
1099: return "Element at position " + i + " is different.";
1100: }
1101:
1102: // no difference found
1103: return null;
1104: }
1105:
1106: public String resolveYacyAddress(String host) {
1107: yacySeed seed;
1108: int p;
1109: String subdom = null;
1110: if (host.endsWith(".yacyh")) {
1111: // this is not functional at the moment
1112: // caused by lowecasing of hashes at the browser client
1113: p = host.indexOf(".");
1114: if ((p > 0) && (p != (host.length() - 6))) {
1115: subdom = host.substring(0, p);
1116: host = host.substring(p + 1);
1117: }
1118: // check if we have a b64-hash or a hex-hash
1119: String hash = host.substring(0, host.length() - 6);
1120: if (hash.length() > commonHashLength) {
1121: // this is probably a hex-hash
1122: hash = yacySeed.hexHash2b64Hash(hash);
1123: }
1124: // check remote seeds
1125: seed = getConnected(hash); // checks only remote, not local
1126: // check local seed
1127: if (seed == null) {
1128: if (this .mySeed == null)
1129: initMySeed();
1130: if (hash.equals(mySeed.hash))
1131: seed = mySeed;
1132: else
1133: return null;
1134: }
1135: return seed.getPublicAddress()
1136: + ((subdom == null) ? "" : ("/" + subdom));
1137: } else if (host.endsWith(".yacy")) {
1138: // identify subdomain
1139: p = host.indexOf(".");
1140: if ((p > 0) && (p != (host.length() - 5))) {
1141: subdom = host.substring(0, p); // no double-dot attack possible, the subdom cannot have ".." in it
1142: host = host.substring(p + 1); // if ever, the double-dots are here but do not harm
1143: }
1144: // identify domain
1145: String domain = host.substring(0, host.length() - 5)
1146: .toLowerCase();
1147: seed = lookupByName(domain);
1148: if (seed == null)
1149: return null;
1150: if (this .mySeed == null)
1151: initMySeed();
1152: if ((seed == mySeed) && (!(seed.isOnline()))) {
1153: // take local ip instead of external
1154: return serverDomains.myPublicIP()
1155: + ":"
1156: + serverCore.getPortNr(sb.getConfig("port",
1157: "8080"))
1158: + ((subdom == null) ? "" : ("/" + subdom));
1159: }
1160: return seed.getPublicAddress()
1161: + ((subdom == null) ? "" : ("/" + subdom));
1162: } else {
1163: return null;
1164: }
1165: }
1166:
1167: class seedEnum implements Iterator<yacySeed> {
1168:
1169: kelondroMapObjects.mapIterator it;
1170: yacySeed nextSeed;
1171: kelondroMapObjects database;
1172: float minVersion;
1173:
1174: public seedEnum(boolean up, boolean rot, byte[] firstKey,
1175: byte[] secondKey, kelondroMapObjects database,
1176: float minVersion) {
1177: this .database = database;
1178: this .minVersion = minVersion;
1179: try {
1180: it = (firstKey == null) ? database.maps(up, rot)
1181: : database.maps(up, rot, firstKey, secondKey);
1182: while (true) {
1183: nextSeed = internalNext();
1184: if (nextSeed == null)
1185: break;
1186: if (nextSeed.getVersion() >= this .minVersion)
1187: break;
1188: }
1189: } catch (IOException e) {
1190: e.printStackTrace();
1191: yacyCore.log.logSevere(
1192: "ERROR seedLinEnum: seed.db corrupt ("
1193: + e.getMessage()
1194: + "); resetting seed.db", e);
1195: if (database == seedActiveDB)
1196: seedActiveDB = resetSeedTable(seedActiveDB,
1197: seedActiveDBFile);
1198: if (database == seedPassiveDB)
1199: seedPassiveDB = resetSeedTable(seedPassiveDB,
1200: seedPassiveDBFile);
1201: it = null;
1202: } catch (kelondroException e) {
1203: e.printStackTrace();
1204: yacyCore.log.logSevere(
1205: "ERROR seedLinEnum: seed.db corrupt ("
1206: + e.getMessage()
1207: + "); resetting seed.db", e);
1208: if (database == seedActiveDB)
1209: seedActiveDB = resetSeedTable(seedActiveDB,
1210: seedActiveDBFile);
1211: if (database == seedPassiveDB)
1212: seedPassiveDB = resetSeedTable(seedPassiveDB,
1213: seedPassiveDBFile);
1214: it = null;
1215: }
1216: }
1217:
1218: public seedEnum(boolean up, String field,
1219: kelondroMapObjects database) {
1220: this .database = database;
1221: try {
1222: it = database.maps(up, field);
1223: nextSeed = internalNext();
1224: } catch (kelondroException e) {
1225: e.printStackTrace();
1226: yacyCore.log.logSevere(
1227: "ERROR seedLinEnum: seed.db corrupt ("
1228: + e.getMessage()
1229: + "); resetting seed.db", e);
1230: if (database == seedActiveDB)
1231: seedActiveDB = resetSeedTable(seedActiveDB,
1232: seedActiveDBFile);
1233: if (database == seedPassiveDB)
1234: seedPassiveDB = resetSeedTable(seedPassiveDB,
1235: seedPassiveDBFile);
1236: if (database == seedPotentialDB)
1237: seedPotentialDB = resetSeedTable(seedPotentialDB,
1238: seedPotentialDBFile);
1239: it = null;
1240: }
1241: }
1242:
1243: public boolean hasNext() {
1244: return (nextSeed != null);
1245: }
1246:
1247: public yacySeed internalNext() {
1248: if ((it == null) || (!(it.hasNext())))
1249: return null;
1250: HashMap<String, String> dna = it.next();
1251: if (dna == null)
1252: return null;
1253: String hash = (String) dna.remove("key");
1254: //while (hash.length() < commonHashLength) { hash = hash + "_"; }
1255: return new yacySeed(hash, dna);
1256: }
1257:
1258: public yacySeed next() {
1259: yacySeed seed = nextSeed;
1260: try {
1261: while (true) {
1262: nextSeed = internalNext();
1263: if (nextSeed == null)
1264: break;
1265: if (nextSeed.getVersion() >= this .minVersion)
1266: break;
1267: }
1268: } catch (kelondroException e) {
1269: e.printStackTrace();
1270: // eergency reset
1271: yacyCore.log.logSevere("seed-db emergency reset", e);
1272: try {
1273: database.reset();
1274: nextSeed = null;
1275: return null;
1276: } catch (IOException e1) {
1277: // no recovery possible
1278: e1.printStackTrace();
1279: System.exit(-1);
1280: }
1281: }
1282: return seed;
1283: }
1284:
1285: public void remove() {
1286: throw new UnsupportedOperationException();
1287: }
1288:
1289: }
1290:
1291: }
|