mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
some code-cleanup and possible speed enhancements in different core methods
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4935 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
6a9cc29cdd
commit
b928ae492a
|
@ -230,12 +230,7 @@ public class Balancer {
|
|||
}
|
||||
|
||||
public synchronized boolean has(String urlhash) {
|
||||
try {
|
||||
return urlFileIndex.has(urlhash.getBytes());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return false;
|
||||
}
|
||||
return urlFileIndex.has(urlhash.getBytes());
|
||||
}
|
||||
|
||||
public boolean notEmpty() {
|
||||
|
@ -345,7 +340,7 @@ public class Balancer {
|
|||
urlFileIndex.put(entry.toRow());
|
||||
|
||||
// check size of domainStacks and flush
|
||||
if ((domainStacks.size() > 20) || (sizeDomainStacks() > 1000)) {
|
||||
if ((domainStacks.size() > 100) || (sizeDomainStacks() > 1000)) {
|
||||
flushOnceDomStacks(1, urlRAMStack.size() < 100); // when the ram stack is small, flush it there
|
||||
}
|
||||
}
|
||||
|
|
|
@ -238,6 +238,11 @@ public final class CrawlStacker extends Thread {
|
|||
int currentdepth,
|
||||
CrawlProfile.entry profile) {
|
||||
if (profile == null) return;
|
||||
|
||||
// check first before we create a big object
|
||||
if (this.urlEntryCache.has(nexturl.hash().getBytes())) return;
|
||||
|
||||
// now create the big object before we enter the synchronized block
|
||||
CrawlEntry newEntry = new CrawlEntry(
|
||||
initiatorHash,
|
||||
nexturl,
|
||||
|
@ -249,15 +254,15 @@ public final class CrawlStacker extends Thread {
|
|||
0,
|
||||
0
|
||||
);
|
||||
|
||||
if (newEntry == null) return;
|
||||
kelondroRow.Entry newEntryRow = newEntry.toRow();
|
||||
|
||||
synchronized(this.urlEntryHashCache) {
|
||||
synchronized(this.urlEntryHashCache) {
|
||||
kelondroRow.Entry oldValue;
|
||||
boolean hostknown = true;
|
||||
if (prequeue) hostknown = prefetchHost(nexturl.getHost());
|
||||
try {
|
||||
oldValue = this.urlEntryCache.put(newEntry.toRow());
|
||||
oldValue = this.urlEntryCache.put(newEntryRow);
|
||||
} catch (IOException e) {
|
||||
oldValue = null;
|
||||
}
|
||||
|
@ -346,7 +351,7 @@ public final class CrawlStacker extends Thread {
|
|||
synchronized (this.urlEntryHashCache) {
|
||||
urlHash = this.urlEntryHashCache.removeFirst();
|
||||
if (urlHash == null) throw new IOException("urlHash is null");
|
||||
entry = this.urlEntryCache.remove(urlHash.getBytes(), false);
|
||||
entry = this.urlEntryCache.remove(urlHash.getBytes(), true);
|
||||
}
|
||||
|
||||
if ((urlHash == null) || (entry == null)) return null;
|
||||
|
|
|
@ -90,15 +90,6 @@ public final class ResultURLs {
|
|||
assert executorHash != null;
|
||||
if (e == null) { return; }
|
||||
try {
|
||||
// switch (stackType) {
|
||||
// case 0: break;
|
||||
// case 1: externResultStack.add(e.hash() + initiatorHash + executorHash); break;
|
||||
// case 2: searchResultStack.add(e.hash() + initiatorHash + executorHash); break;
|
||||
// case 3: transfResultStack.add(e.hash() + initiatorHash + executorHash); break;
|
||||
// case 4: proxyResultStack.add(e.hash() + initiatorHash + executorHash); break;
|
||||
// case 5: lcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
|
||||
// case 6: gcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
|
||||
// }
|
||||
final List<String> resultStack = getStack(stackType);
|
||||
if(resultStack != null) {
|
||||
resultStack.add(e.hash() + initiatorHash + executorHash);
|
||||
|
@ -121,54 +112,18 @@ public final class ResultURLs {
|
|||
} else {
|
||||
return -1;
|
||||
}
|
||||
// switch (stack) {
|
||||
// case 1: return externResultStack.size();
|
||||
// case 2: return searchResultStack.size();
|
||||
// case 3: return transfResultStack.size();
|
||||
// case 4: return proxyResultStack.size();
|
||||
// case 5: return lcrawlResultStack.size();
|
||||
// case 6: return gcrawlResultStack.size();
|
||||
// }
|
||||
// return -1;
|
||||
}
|
||||
|
||||
public synchronized String getUrlHash(int stack, int pos) {
|
||||
return getHashNo(stack, pos, 0);
|
||||
// switch (stack) {
|
||||
// case 1: return (externResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
|
||||
// case 2: return (searchResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
|
||||
// case 3: return (transfResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
|
||||
// case 4: return (proxyResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
|
||||
// case 5: return (lcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
|
||||
// case 6: return (gcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
|
||||
// }
|
||||
// return null;
|
||||
}
|
||||
|
||||
public synchronized String getInitiatorHash(int stack, int pos) {
|
||||
return getHashNo(stack, pos, 1);
|
||||
// switch (stack) {
|
||||
// case 1: return (externResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
|
||||
// case 2: return (searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
|
||||
// case 3: return (transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
|
||||
// case 4: return (proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
|
||||
// case 5: return (lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
|
||||
// case 6: return (gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
|
||||
// }
|
||||
// return null;
|
||||
}
|
||||
|
||||
public synchronized String getExecutorHash(final int stack, int pos) {
|
||||
return getHashNo(stack, pos, 2);
|
||||
// switch (stack) {
|
||||
// case 1: return (externResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
|
||||
// case 2: return (searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
|
||||
// case 3: return (transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
|
||||
// case 4: return (proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
|
||||
// case 5: return (lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
|
||||
// case 6: return (gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
|
||||
// }
|
||||
// return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -221,6 +176,7 @@ public final class ResultURLs {
|
|||
|
||||
final List<String> resultStack = getStack(stack);
|
||||
if(resultStack != null) {
|
||||
assert pos < resultStack.size() : "pos = " + pos + ", resultStack.size() = " + resultStack.size();
|
||||
if(pos < resultStack.size()) {
|
||||
return resultStack.get(pos);
|
||||
} else {
|
||||
|
|
|
@ -141,11 +141,7 @@ public class ZURL {
|
|||
}
|
||||
|
||||
public boolean exists(String urlHash) {
|
||||
try {
|
||||
return urlIndex.has(urlHash.getBytes());
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
return urlIndex.has(urlHash.getBytes());
|
||||
}
|
||||
|
||||
public void clearStack() {
|
||||
|
|
|
@ -28,6 +28,7 @@ package de.anomic.index;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
import de.anomic.kelondro.kelondroCloneableIterator;
|
||||
|
@ -159,7 +160,7 @@ public final class indexRAMRI implements indexRI, indexRIReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
public synchronized String bestFlushWordHash() {
|
||||
private String bestFlushWordHash() {
|
||||
// select appropriate hash
|
||||
// we have 2 different methods to find a good hash:
|
||||
// - the oldest entry in the cache
|
||||
|
@ -189,7 +190,8 @@ public final class indexRAMRI implements indexRI, indexRIReader {
|
|||
hash = hashDate.getMinObject(); // flush oldest entries
|
||||
}
|
||||
if (hash == null) {
|
||||
heap.wordContainers(null, false).next();
|
||||
indexContainer ic = heap.wordContainers(null, false).next();
|
||||
if (ic != null) hash = ic.getWordHash();
|
||||
}
|
||||
return hash;
|
||||
} catch (Exception e) {
|
||||
|
@ -198,6 +200,23 @@ public final class indexRAMRI implements indexRI, indexRIReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
public synchronized ArrayList<indexContainer> bestFlushContainers(int count) {
|
||||
ArrayList<indexContainer> containerList = new ArrayList<indexContainer>();
|
||||
String hash;
|
||||
indexContainer container;
|
||||
for (int i = 0; i < count; i++) {
|
||||
hash = bestFlushWordHash();
|
||||
if (hash == null) return containerList;
|
||||
container = heap.delete(hash);
|
||||
assert (container != null);
|
||||
if (container == null) return containerList;
|
||||
hashScore.deleteScore(hash);
|
||||
hashDate.deleteScore(hash);
|
||||
containerList.add(container);
|
||||
}
|
||||
return containerList;
|
||||
}
|
||||
|
||||
private int intTime(long longTime) {
|
||||
return (int) Math.max(0, ((longTime - initTime) / 1000));
|
||||
}
|
||||
|
|
|
@ -151,11 +151,7 @@ public final class indexRepositoryReference {
|
|||
|
||||
public synchronized boolean exists(String urlHash) {
|
||||
if (urlIndexFile == null) return false; // case may happen during shutdown
|
||||
try {
|
||||
return urlIndexFile.has(urlHash.getBytes());
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
return urlIndexFile.has(urlHash.getBytes());
|
||||
}
|
||||
|
||||
public kelondroCloneableIterator<indexURLReference> entries(boolean up, String firstHash) throws IOException {
|
||||
|
|
|
@ -49,6 +49,11 @@ public class kelondroBytesIntMap {
|
|||
return index.row();
|
||||
}
|
||||
|
||||
public synchronized boolean has(byte[] key) {
|
||||
assert (key != null);
|
||||
return index.has(key);
|
||||
}
|
||||
|
||||
public synchronized int geti(byte[] key) throws IOException {
|
||||
assert (key != null);
|
||||
kelondroRow.Entry indexentry = index.get(key);
|
||||
|
|
|
@ -191,7 +191,7 @@ public class kelondroCache implements kelondroIndex {
|
|||
readMissCache = null;
|
||||
}
|
||||
|
||||
public boolean has(byte[] key) throws IOException {
|
||||
public boolean has(byte[] key) {
|
||||
// first look into the miss cache
|
||||
if (readMissCache != null) {
|
||||
if (readMissCache.get(key) != null) {
|
||||
|
|
|
@ -345,10 +345,15 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
return rowdef.newEntry(b);
|
||||
}
|
||||
|
||||
public synchronized boolean has(byte[] key) throws IOException {
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
public synchronized boolean has(byte[] key) {
|
||||
try {
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
return index.geti(key) >= 0;
|
||||
return index.has(key);
|
||||
}
|
||||
|
||||
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
|
||||
|
|
|
@ -147,12 +147,12 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
|||
return RAMIndex;
|
||||
}
|
||||
|
||||
public synchronized boolean has(byte[] key) throws IOException {
|
||||
public synchronized boolean has(byte[] key) {
|
||||
// it is not recommended to implement or use a has predicate unless
|
||||
// it can be ensured that it causes no IO
|
||||
if ((kelondroAbstractRecords.debugmode) && (RAMIndex != true)) serverLog.logWarning("kelondroFlexTable", "RAM index warning in file " + super.tablename);
|
||||
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
|
||||
return index.geti(key) >= 0;
|
||||
return index.has(key);
|
||||
}
|
||||
|
||||
private kelondroBytesIntMap initializeRamIndex(int initialSpace) {
|
||||
|
|
|
@ -61,7 +61,7 @@ public interface kelondroIndex {
|
|||
public int size();
|
||||
public kelondroProfile profile();
|
||||
public kelondroRow row();
|
||||
public boolean has(byte[] key) throws IOException; // use this only if there is no get in case that has returns true
|
||||
public boolean has(byte[] key); // use this only if there is no get in case that has returns true
|
||||
public kelondroRow.Entry get(byte[] key) throws IOException;
|
||||
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
|
||||
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;
|
||||
|
|
|
@ -109,7 +109,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
|
|||
}
|
||||
|
||||
public synchronized boolean has(byte[] key) {
|
||||
return (get(key) != null);
|
||||
long handle = profile.startRead();
|
||||
int index = find(key, 0, key.length);
|
||||
profile.stopRead(handle);
|
||||
return index >= 0;
|
||||
}
|
||||
|
||||
public synchronized kelondroRow.Entry get(byte[] key) {
|
||||
|
|
|
@ -135,8 +135,12 @@ public class kelondroSQLTable implements kelondroIndex {
|
|||
return this.rowdef;
|
||||
}
|
||||
|
||||
public boolean has(byte[] key) throws IOException {
|
||||
return (get(key) != null);
|
||||
public boolean has(byte[] key) {
|
||||
try {
|
||||
return (get(key) != null);
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<kelondroRowCollection> removeDoubles() {
|
||||
|
|
|
@ -139,7 +139,7 @@ public class kelondroSplitTable implements kelondroIndex {
|
|||
if (f.isDirectory()) {
|
||||
// this is a kelonodroFlex table
|
||||
serverLog.logInfo("kelondroSplitTable", "opening partial flex table " + path);
|
||||
table = new kelondroCache(new kelondroFlexTable(path, maxf, rowdef, 0, resetOnFail));
|
||||
table = new kelondroFlexTable(path, maxf, rowdef, 0, resetOnFail);
|
||||
} else {
|
||||
serverLog.logInfo("kelondroSplitTable", "opening partial eco table " + f);
|
||||
table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
|
||||
|
@ -209,7 +209,7 @@ public class kelondroSplitTable implements kelondroIndex {
|
|||
return this.rowdef;
|
||||
}
|
||||
|
||||
public boolean has(byte[] key) throws IOException {
|
||||
public boolean has(byte[] key) {
|
||||
return keeperOf(key) != null;
|
||||
}
|
||||
|
||||
|
@ -276,19 +276,13 @@ public class kelondroSplitTable implements kelondroIndex {
|
|||
try {
|
||||
cs.submit(new Callable<kelondroIndex>() {
|
||||
public kelondroIndex call() {
|
||||
try {
|
||||
if (table.has(key)) return table; else return dummyIndex;
|
||||
} catch (IOException e) {
|
||||
return dummyIndex;
|
||||
}
|
||||
if (table.has(key)) return table; else return dummyIndex;
|
||||
}
|
||||
});
|
||||
} catch (RejectedExecutionException e) {
|
||||
// the executor is either shutting down or the blocking queue is full
|
||||
// execute the search direct here without concurrency
|
||||
try {
|
||||
if (table.has(key)) return table;
|
||||
} catch (IOException ee) {}
|
||||
if (table.has(key)) return table;
|
||||
rejected++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -170,7 +170,7 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex
|
|||
n.commit();
|
||||
}
|
||||
|
||||
public boolean has(byte[] key) throws IOException {
|
||||
public boolean has(byte[] key) {
|
||||
throw new UnsupportedOperationException("has should not be used with kelondroTree.");
|
||||
}
|
||||
|
||||
|
|
|
@ -358,7 +358,7 @@ public final class plasmaCondenser {
|
|||
k = it.next();
|
||||
wsp = words.get(k);
|
||||
wsp.check(idx);
|
||||
words.put(k, wsp);
|
||||
words.put(k, wsp); // is that necessary?
|
||||
}
|
||||
}
|
||||
sentence = new StringBuffer(100);
|
||||
|
|
|
@ -502,19 +502,8 @@ public final class plasmaWordIndex implements indexRI {
|
|||
}
|
||||
}
|
||||
count = count - containerList.size();
|
||||
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
|
||||
synchronized (ram) {
|
||||
if (ram.size() == 0) break;
|
||||
if (serverMemory.available() < collections.minMem()) break; // protect memory during flush
|
||||
|
||||
// select one word to flush
|
||||
wordHash = ram.bestFlushWordHash();
|
||||
|
||||
// move one container from ram to flush list
|
||||
if (wordHash == null) c = null; else c = ram.deleteContainer(wordHash);
|
||||
}
|
||||
if (c != null) containerList.add(c);
|
||||
}
|
||||
containerList.addAll(ram.bestFlushContainers(count));
|
||||
|
||||
// flush the containers
|
||||
for (indexContainer container : containerList) collections.addEntries(container);
|
||||
//System.out.println("DEBUG-Finished flush of " + count + " entries from RAM to DB in " + (System.currentTimeMillis() - start) + " milliseconds");
|
||||
|
|
Loading…
Reference in New Issue
Block a user