mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- fixed a null pointer exception bug
- switched off more write caches - re-enabled index-abstracts search git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2885 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
194d42b6a7
commit
bd4f43cd66
|
@ -54,7 +54,7 @@ globalheader();
|
|||
|
||||
<p><b>Latest Release:</b>
|
||||
The latest YaCy release version is 0.48<br>
|
||||
Nightly builds from compiles out of SVN can be obtained from <a href="http://latest.yacy-forum.de/">http://latest.yacy-forum.de/</a>.<br>
|
||||
Nightly builds from compiles out of SVN can be obtained from <a href="http://latest.yacy-forum.net">http://latest.yacy-forum.de/</a>.<br>
|
||||
|
||||
<ul>
|
||||
<li>Generic release of YaCy (all platforms with J2SE 1.4.2: Linux, Mac OS X, Windows, Solaris):</li>
|
||||
|
|
|
@ -56,7 +56,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
|||
if (indexfile.exists()) {
|
||||
// use existing index file
|
||||
System.out.println("*** Using File index " + indexfile);
|
||||
ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true);
|
||||
ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, false);
|
||||
RAMIndex = false;
|
||||
} else if ((preloadTime >= 0) && (stt > preloadTime)) {
|
||||
// generate new index file
|
||||
|
@ -127,21 +127,25 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
|||
}
|
||||
|
||||
private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException {
|
||||
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true);
|
||||
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, false);
|
||||
Iterator content = super.col[0].contentNodes(-1);
|
||||
kelondroRecords.Node node;
|
||||
kelondroRow.Entry indexentry;
|
||||
int i;
|
||||
int i, c = 0, all = super.col[0].size();
|
||||
long start = System.currentTimeMillis();
|
||||
long last = start;
|
||||
while (content.hasNext()) {
|
||||
node = (kelondroRecords.Node) content.next();
|
||||
i = node.handle().hashCode();
|
||||
indexentry = treeindex.row().newEntry();
|
||||
indexentry.setCol(0, node.getValueRow());
|
||||
indexentry.setCol(1, i);
|
||||
treeindex.put(indexentry);
|
||||
if ((i % 10000) == 0) {
|
||||
System.out.print('.');
|
||||
treeindex.addUnique(indexentry);
|
||||
c++;
|
||||
if (System.currentTimeMillis() - last > 30000) {
|
||||
System.out.println(".. generated " + c+ " entries, " + ((System.currentTimeMillis() - start) / c * (all - c) / 60000) + " minutes remaining");
|
||||
System.out.flush();
|
||||
last = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
return treeindex;
|
||||
|
|
|
@ -44,6 +44,8 @@ public class URL {
|
|||
|
||||
public void parseURLString(String url) throws MalformedURLException {
|
||||
// identify protocol
|
||||
assert (url != null);
|
||||
url = url.trim();
|
||||
int p = url.indexOf(':');
|
||||
if (p < 0) throw new MalformedURLException("protocol is not given in '" + url + "'");
|
||||
this.protocol = url.substring(0, p).toLowerCase().trim();
|
||||
|
@ -104,6 +106,7 @@ public class URL {
|
|||
|
||||
public URL(URL baseURL, String relPath) throws MalformedURLException {
|
||||
if (baseURL == null) throw new MalformedURLException("base URL is null");
|
||||
if (relPath == null) throw new MalformedURLException("relPath is null");
|
||||
int p = relPath.indexOf(':');
|
||||
String relprotocol = (p < 0) ? null : relPath.substring(0, p).toLowerCase();
|
||||
if (relprotocol != null) {
|
||||
|
|
|
@ -117,7 +117,7 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
|
|||
public plasmaCrawlLURLOldEntry(kelondroRow.Entry entry, indexEntry searchedWord) throws IOException {
|
||||
try {
|
||||
this.urlHash = entry.getColString(0, null);
|
||||
this.url = new URL(entry.getColString(1, "UTF-8").trim());
|
||||
this.url = new URL(entry.getColString(1, "UTF-8"));
|
||||
this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim();
|
||||
this.moddate = new Date(86400000 * entry.getColLong(3));
|
||||
this.loaddate = new Date(86400000 * entry.getColLong(4));
|
||||
|
|
|
@ -93,7 +93,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
this.snippetCache = snippetCache;
|
||||
this.rcContainers = new indexContainer(null);
|
||||
this.rcContainerFlushCount = 0;
|
||||
this.rcAbstracts = new TreeMap();
|
||||
this.rcAbstracts = (query.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
|
||||
this.profileLocal = localTiming;
|
||||
this.profileGlobal = remoteTiming;
|
||||
this.postsort = postsort;
|
||||
|
@ -134,7 +134,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
// do a global search
|
||||
// the result of the fetch is then in the rcGlobal
|
||||
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
|
||||
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 2;
|
||||
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2;
|
||||
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
|
||||
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), "",
|
||||
query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts,
|
||||
|
@ -144,7 +144,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
Map searchContainerMap = localSearchContainers(null);
|
||||
|
||||
// use the search containers to fill up rcAbstracts locally
|
||||
if (searchContainerMap != null) {
|
||||
/*
|
||||
if ((rcAbstracts != null) && (searchContainerMap != null)) {
|
||||
Iterator i, ci = searchContainerMap.entrySet().iterator();
|
||||
Map.Entry entry;
|
||||
String wordhash;
|
||||
|
@ -165,20 +166,19 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// try to pre-fetch some LURLs if there is enough time
|
||||
indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values());
|
||||
prefetchLocal(rcLocal, secondaryTimeout);
|
||||
|
||||
// evaluate index abstracts and start a secondary search
|
||||
// this is temporary debugging code to learn that the index abstracts are fetched correctly
|
||||
/*
|
||||
while (System.currentTimeMillis() < secondaryTimeout + 10000) {
|
||||
while (System.currentTimeMillis() < secondaryTimeout) {
|
||||
if (yacySearch.remainingWaiting(primarySearchThreads) == 0) break; // all threads have finished
|
||||
try {Thread.sleep(100);} catch (InterruptedException e) {}
|
||||
}
|
||||
if (query.size() > 1) prepareSecondarySearch();
|
||||
*/
|
||||
// evaluate index abstracts and start a secondary search
|
||||
if (rcAbstracts != null) prepareSecondarySearch();
|
||||
|
||||
// catch up global results:
|
||||
// wait until primary timeout passed
|
||||
|
@ -187,6 +187,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
((secondarySearchThreads == null) || (yacySearch.remainingWaiting(secondarySearchThreads) == 0))) break; // all threads have finished
|
||||
try {Thread.sleep(100);} catch (InterruptedException e) {}
|
||||
}
|
||||
|
||||
int globalContributions = rcContainers.size();
|
||||
|
||||
// finished searching
|
||||
|
@ -243,19 +244,22 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
String url, urls, peer, peers;
|
||||
String mypeerhash = yacyCore.seedDB.mySeed.hash;
|
||||
boolean mypeerinvolved = false;
|
||||
int mypeercount;
|
||||
while (i1.hasNext()) {
|
||||
entry1 = (Map.Entry) i1.next();
|
||||
url = (String) entry1.getKey();
|
||||
peers = (String) entry1.getValue();
|
||||
System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers);
|
||||
mypeercount = 0;
|
||||
for (int j = 0; j < peers.length(); j = j + 12) {
|
||||
peer = peers.substring(j, j + 12);
|
||||
if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
|
||||
if ((peer.equals(mypeerhash)) && (mypeercount++ > 1)) continue;
|
||||
//if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
|
||||
urls = (String) secondarySearchURLs.get(peer);
|
||||
urls = (urls == null) ? url : urls + url;
|
||||
secondarySearchURLs.put(peer, urls);
|
||||
if (peer.equals(mypeerhash)) mypeerinvolved = true;
|
||||
}
|
||||
if (mypeercount == 1) mypeerinvolved = true;
|
||||
}
|
||||
|
||||
// compute words for secondary search and start the secondary searches
|
||||
|
@ -269,8 +273,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
if (peer.equals(mypeerhash)) continue; // we dont need to ask ourself
|
||||
urls = (String) entry1.getValue();
|
||||
words = wordsFromPeer(peer, urls);
|
||||
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " has urls: " + urls);
|
||||
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " from words: " + words);
|
||||
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
|
||||
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
|
||||
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
|
||||
words, urls, urlStore, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache,
|
||||
profileGlobal, ranking);
|
||||
|
|
|
@ -433,7 +433,8 @@ public final class yacyClient {
|
|||
obj.put("maxdist", maxDistance);
|
||||
obj.put("rankingProfile", rankingProfile.toExternalString());
|
||||
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
|
||||
|
||||
if (abstractCache != null) obj.put("abstracts", "auto");
|
||||
|
||||
//yacyCore.log.logDebug("yacyClient.search url=" + url);
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
|
||||
|
@ -553,26 +554,27 @@ public final class yacyClient {
|
|||
for (int m = 0; m < words; m++) { containerCache.add(container[m], -1); }
|
||||
|
||||
// read index abstract
|
||||
Iterator i = result.entrySet().iterator();
|
||||
Map.Entry entry;
|
||||
TreeMap singleAbstract;
|
||||
String wordhash;
|
||||
serverByteBuffer ci;
|
||||
while (i.hasNext()) {
|
||||
entry = (Map.Entry) i.next();
|
||||
if (((String) entry.getKey()).startsWith("indexabstract.")) {
|
||||
wordhash = ((String) entry.getKey()).substring(14);
|
||||
synchronized (abstractCache) {
|
||||
singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
|
||||
if (singleAbstract == null) singleAbstract = new TreeMap();
|
||||
ci = new serverByteBuffer(((String) entry.getValue()).getBytes());
|
||||
//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
|
||||
indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash);
|
||||
abstractCache.put(wordhash, singleAbstract);
|
||||
if (abstractCache != null) {
|
||||
Iterator i = result.entrySet().iterator();
|
||||
Map.Entry entry;
|
||||
TreeMap singleAbstract;
|
||||
String wordhash;
|
||||
serverByteBuffer ci;
|
||||
while (i.hasNext()) {
|
||||
entry = (Map.Entry) i.next();
|
||||
if (((String) entry.getKey()).startsWith("indexabstract.")) {
|
||||
wordhash = ((String) entry.getKey()).substring(14);
|
||||
synchronized (abstractCache) {
|
||||
singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
|
||||
if (singleAbstract == null) singleAbstract = new TreeMap();
|
||||
ci = new serverByteBuffer(((String) entry.getValue()).getBytes());
|
||||
System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
|
||||
indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash);
|
||||
abstractCache.put(wordhash, singleAbstract);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// generate statistics
|
||||
long searchtime;
|
||||
try {
|
||||
|
|
|
@ -237,6 +237,7 @@ public class yacySearch extends Thread {
|
|||
if (searchThreads == null) return 0;
|
||||
int alive = 0;
|
||||
for (int i = 0; i < searchThreads.length; i++) {
|
||||
if (searchThreads == null) break; // may occur
|
||||
if (searchThreads[i].isAlive()) alive++;
|
||||
}
|
||||
return alive;
|
||||
|
|
Loading…
Reference in New Issue
Block a user