some code-cleanup and possible speed enhancements in different core methods

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4935 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2008-06-17 23:56:39 +00:00
parent 6a9cc29cdd
commit b928ae492a
17 changed files with 70 additions and 103 deletions

View File

@ -230,12 +230,7 @@ public class Balancer {
}
public synchronized boolean has(String urlhash) {
try {
return urlFileIndex.has(urlhash.getBytes());
} catch (IOException e) {
e.printStackTrace();
return false;
}
return urlFileIndex.has(urlhash.getBytes());
}
public boolean notEmpty() {
@ -345,7 +340,7 @@ public class Balancer {
urlFileIndex.put(entry.toRow());
// check size of domainStacks and flush
if ((domainStacks.size() > 20) || (sizeDomainStacks() > 1000)) {
if ((domainStacks.size() > 100) || (sizeDomainStacks() > 1000)) {
flushOnceDomStacks(1, urlRAMStack.size() < 100); // when the ram stack is small, flush it there
}
}

View File

@ -238,6 +238,11 @@ public final class CrawlStacker extends Thread {
int currentdepth,
CrawlProfile.entry profile) {
if (profile == null) return;
// check first before we create a big object
if (this.urlEntryCache.has(nexturl.hash().getBytes())) return;
// now create the big object before we enter the synchronized block
CrawlEntry newEntry = new CrawlEntry(
initiatorHash,
nexturl,
@ -249,15 +254,15 @@ public final class CrawlStacker extends Thread {
0,
0
);
if (newEntry == null) return;
kelondroRow.Entry newEntryRow = newEntry.toRow();
synchronized(this.urlEntryHashCache) {
kelondroRow.Entry oldValue;
boolean hostknown = true;
if (prequeue) hostknown = prefetchHost(nexturl.getHost());
try {
oldValue = this.urlEntryCache.put(newEntry.toRow());
oldValue = this.urlEntryCache.put(newEntryRow);
} catch (IOException e) {
oldValue = null;
}
@ -346,7 +351,7 @@ public final class CrawlStacker extends Thread {
synchronized (this.urlEntryHashCache) {
urlHash = this.urlEntryHashCache.removeFirst();
if (urlHash == null) throw new IOException("urlHash is null");
entry = this.urlEntryCache.remove(urlHash.getBytes(), false);
entry = this.urlEntryCache.remove(urlHash.getBytes(), true);
}
if ((urlHash == null) || (entry == null)) return null;

View File

@ -90,15 +90,6 @@ public final class ResultURLs {
assert executorHash != null;
if (e == null) { return; }
try {
// switch (stackType) {
// case 0: break;
// case 1: externResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 2: searchResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 3: transfResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 4: proxyResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 5: lcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 6: gcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
// }
final List<String> resultStack = getStack(stackType);
if(resultStack != null) {
resultStack.add(e.hash() + initiatorHash + executorHash);
@ -121,54 +112,18 @@ public final class ResultURLs {
} else {
return -1;
}
// switch (stack) {
// case 1: return externResultStack.size();
// case 2: return searchResultStack.size();
// case 3: return transfResultStack.size();
// case 4: return proxyResultStack.size();
// case 5: return lcrawlResultStack.size();
// case 6: return gcrawlResultStack.size();
// }
// return -1;
}
public synchronized String getUrlHash(int stack, int pos) {
return getHashNo(stack, pos, 0);
// switch (stack) {
// case 1: return (externResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 2: return (searchResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 3: return (transfResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 4: return (proxyResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 5: return (lcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 6: return (gcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// }
// return null;
}
public synchronized String getInitiatorHash(int stack, int pos) {
return getHashNo(stack, pos, 1);
// switch (stack) {
// case 1: return (externResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 2: return (searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 3: return (transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 4: return (proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 5: return (lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 6: return (gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// }
// return null;
}
public synchronized String getExecutorHash(final int stack, int pos) {
return getHashNo(stack, pos, 2);
// switch (stack) {
// case 1: return (externResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 2: return (searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 3: return (transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 4: return (proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 5: return (lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 6: return (gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// }
// return null;
}
/**
@ -221,6 +176,7 @@ public final class ResultURLs {
final List<String> resultStack = getStack(stack);
if(resultStack != null) {
assert pos < resultStack.size() : "pos = " + pos + ", resultStack.size() = " + resultStack.size();
if(pos < resultStack.size()) {
return resultStack.get(pos);
} else {

View File

@ -141,11 +141,7 @@ public class ZURL {
}
public boolean exists(String urlHash) {
try {
return urlIndex.has(urlHash.getBytes());
} catch (IOException e) {
return false;
}
return urlIndex.has(urlHash.getBytes());
}
public void clearStack() {

View File

@ -28,6 +28,7 @@ package de.anomic.index;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Set;
import de.anomic.kelondro.kelondroCloneableIterator;
@ -159,7 +160,7 @@ public final class indexRAMRI implements indexRI, indexRIReader {
return null;
}
public synchronized String bestFlushWordHash() {
private String bestFlushWordHash() {
// select appropriate hash
// we have 2 different methods to find a good hash:
// - the oldest entry in the cache
@ -189,7 +190,8 @@ public final class indexRAMRI implements indexRI, indexRIReader {
hash = hashDate.getMinObject(); // flush oldest entries
}
if (hash == null) {
heap.wordContainers(null, false).next();
indexContainer ic = heap.wordContainers(null, false).next();
if (ic != null) hash = ic.getWordHash();
}
return hash;
} catch (Exception e) {
@ -198,6 +200,23 @@ public final class indexRAMRI implements indexRI, indexRIReader {
return null;
}
public synchronized ArrayList<indexContainer> bestFlushContainers(int count) {
ArrayList<indexContainer> containerList = new ArrayList<indexContainer>();
String hash;
indexContainer container;
for (int i = 0; i < count; i++) {
hash = bestFlushWordHash();
if (hash == null) return containerList;
container = heap.delete(hash);
assert (container != null);
if (container == null) return containerList;
hashScore.deleteScore(hash);
hashDate.deleteScore(hash);
containerList.add(container);
}
return containerList;
}
private int intTime(long longTime) {
return (int) Math.max(0, ((longTime - initTime) / 1000));
}

View File

@ -151,11 +151,7 @@ public final class indexRepositoryReference {
public synchronized boolean exists(String urlHash) {
if (urlIndexFile == null) return false; // case may happen during shutdown
try {
return urlIndexFile.has(urlHash.getBytes());
} catch (IOException e) {
return false;
}
return urlIndexFile.has(urlHash.getBytes());
}
public kelondroCloneableIterator<indexURLReference> entries(boolean up, String firstHash) throws IOException {

View File

@ -49,6 +49,11 @@ public class kelondroBytesIntMap {
return index.row();
}
public synchronized boolean has(byte[] key) {
assert (key != null);
return index.has(key);
}
public synchronized int geti(byte[] key) throws IOException {
assert (key != null);
kelondroRow.Entry indexentry = index.get(key);

View File

@ -191,7 +191,7 @@ public class kelondroCache implements kelondroIndex {
readMissCache = null;
}
public boolean has(byte[] key) throws IOException {
public boolean has(byte[] key) {
// first look into the miss cache
if (readMissCache != null) {
if (readMissCache.get(key) != null) {

View File

@ -345,10 +345,15 @@ public class kelondroEcoTable implements kelondroIndex {
return rowdef.newEntry(b);
}
public synchronized boolean has(byte[] key) throws IOException {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
public synchronized boolean has(byte[] key) {
try {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
assert ((table == null) || (table.size() == index.size()));
return index.geti(key) >= 0;
return index.has(key);
}
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {

View File

@ -147,12 +147,12 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return RAMIndex;
}
public synchronized boolean has(byte[] key) throws IOException {
public synchronized boolean has(byte[] key) {
// it is not recommended to implement or use a has predicate unless
// it can be ensured that it causes no IO
if ((kelondroAbstractRecords.debugmode) && (RAMIndex != true)) serverLog.logWarning("kelondroFlexTable", "RAM index warning in file " + super.tablename);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return index.geti(key) >= 0;
return index.has(key);
}
private kelondroBytesIntMap initializeRamIndex(int initialSpace) {

View File

@ -61,7 +61,7 @@ public interface kelondroIndex {
public int size();
public kelondroProfile profile();
public kelondroRow row();
public boolean has(byte[] key) throws IOException; // use this only if there is no get in case that has returns true
public boolean has(byte[] key); // use this only if there is no get in case that has returns true
public kelondroRow.Entry get(byte[] key) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;

View File

@ -109,7 +109,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
}
public synchronized boolean has(byte[] key) {
return (get(key) != null);
long handle = profile.startRead();
int index = find(key, 0, key.length);
profile.stopRead(handle);
return index >= 0;
}
public synchronized kelondroRow.Entry get(byte[] key) {

View File

@ -135,8 +135,12 @@ public class kelondroSQLTable implements kelondroIndex {
return this.rowdef;
}
public boolean has(byte[] key) throws IOException {
return (get(key) != null);
public boolean has(byte[] key) {
try {
return (get(key) != null);
} catch (IOException e) {
return false;
}
}
public ArrayList<kelondroRowCollection> removeDoubles() {

View File

@ -139,7 +139,7 @@ public class kelondroSplitTable implements kelondroIndex {
if (f.isDirectory()) {
// this is a kelonodroFlex table
serverLog.logInfo("kelondroSplitTable", "opening partial flex table " + path);
table = new kelondroCache(new kelondroFlexTable(path, maxf, rowdef, 0, resetOnFail));
table = new kelondroFlexTable(path, maxf, rowdef, 0, resetOnFail);
} else {
serverLog.logInfo("kelondroSplitTable", "opening partial eco table " + f);
table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
@ -209,7 +209,7 @@ public class kelondroSplitTable implements kelondroIndex {
return this.rowdef;
}
public boolean has(byte[] key) throws IOException {
public boolean has(byte[] key) {
return keeperOf(key) != null;
}
@ -276,19 +276,13 @@ public class kelondroSplitTable implements kelondroIndex {
try {
cs.submit(new Callable<kelondroIndex>() {
public kelondroIndex call() {
try {
if (table.has(key)) return table; else return dummyIndex;
} catch (IOException e) {
return dummyIndex;
}
if (table.has(key)) return table; else return dummyIndex;
}
});
} catch (RejectedExecutionException e) {
// the executor is either shutting down or the blocking queue is full
// execute the search direct here without concurrency
try {
if (table.has(key)) return table;
} catch (IOException ee) {}
if (table.has(key)) return table;
rejected++;
}
}

View File

@ -170,7 +170,7 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex
n.commit();
}
public boolean has(byte[] key) throws IOException {
public boolean has(byte[] key) {
throw new UnsupportedOperationException("has should not be used with kelondroTree.");
}

View File

@ -358,7 +358,7 @@ public final class plasmaCondenser {
k = it.next();
wsp = words.get(k);
wsp.check(idx);
words.put(k, wsp);
words.put(k, wsp); // is that necessary?
}
}
sentence = new StringBuffer(100);

View File

@ -502,19 +502,8 @@ public final class plasmaWordIndex implements indexRI {
}
}
count = count - containerList.size();
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
synchronized (ram) {
if (ram.size() == 0) break;
if (serverMemory.available() < collections.minMem()) break; // protect memory during flush
containerList.addAll(ram.bestFlushContainers(count));
// select one word to flush
wordHash = ram.bestFlushWordHash();
// move one container from ram to flush list
if (wordHash == null) c = null; else c = ram.deleteContainer(wordHash);
}
if (c != null) containerList.add(c);
}
// flush the containers
for (indexContainer container : containerList) collections.addEntries(container);
//System.out.println("DEBUG-Finished flush of " + count + " entries from RAM to DB in " + (System.currentTimeMillis() - start) + " milliseconds");