re-implemented proxy-busy-check and fixed some other things

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@421 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2005-07-21 11:17:04 +00:00
parent 98d22a73ef
commit c64970fa47
7 changed files with 41 additions and 68 deletions

View File

@ -165,8 +165,14 @@ Error with file input "#[crawlingStart]#": #[error]#
Set new prefetch depth to "#[newproxyPrefetchDepth]#"
::
Crawling of "#[crawlingURL]#" started.
You can monitor the crawling progress with this page.
<b>Please wait some seconds before refresh of this page, because the request is enqueued and delayed until the http server is idle for a certain time.</b>
You can monitor the crawling progress either by watching the URL queues
(<a href="/IndexCreateWWWLocalQueue_p.html">local queue</a>,
<a href="/IndexCreateWWWGlobalQueue_p.html">global queue</a>,
<a href="/IndexCreateLoaderQueue_p.html">loader queue</a>,
<a href="/IndexCreateLoaderQueue_p.html">indexing queue</a>)
or see the fill/process count of all queues on the
<a href="/Performance_p.html">performance page</a>.
<b>Please wait some seconds, because the request is enqueued and delayed until the http server is idle for a certain time.</b>
The indexing result is presented on the
<a href="IndexMonitor.html">Index Monitor</a>-page.
<b>It will take at least 30 seconds until the first result appears there. Please be patient, the crawling will pause each time you use the proxy or web server to ensure maximum availability.</b>

View File

@ -54,7 +54,7 @@
<tr class="TableCellLight">
<td class="small" align="left" colspan="18">
<input type="submit" name="submitdelay" value="Submit New Delay Values">&nbsp;&nbsp;&nbsp;
<input type="submit" name="submitdefault" value="Submit Default Values">&nbsp;&nbsp;&nbsp;
<input type="submit" name="submitdefault" value="Reset To Default Values">&nbsp;&nbsp;&nbsp;
Changes take effect immediately</td>
</tr>
</form>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 836 B

After

Width:  |  Height:  |  Size: 824 B

View File

@ -272,6 +272,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
Date requestDate = new Date(); // remember the time...
this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction();
switchboard.proxyLastAccess = System.currentTimeMillis();
// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2);
@ -786,6 +787,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String args = conProp.getProperty("ARGS"); // may be null if no args were given
String httpVer = conProp.getProperty(httpd.CONNECTION_PROP_HTTP_VER);
switchboard.proxyLastAccess = System.currentTimeMillis();
int port;
int pos;
if ((pos = host.indexOf(":")) < 0) {
@ -866,6 +869,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// remembering the starting time of the request
Date requestDate = new Date(); // remember the time...
this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
switchboard.proxyLastAccess = System.currentTimeMillis();
// using an ByteCount OutputStream to count the send bytes
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2);
@ -953,6 +957,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException {
this.connectionProperties = conProp;
switchboard.proxyLastAccess = System.currentTimeMillis();
String host = conProp.getProperty("HOST");
int port = Integer.parseInt(conProp.getProperty("PORT"));

View File

@ -74,7 +74,6 @@ import de.anomic.tools.enumerateFiles;
public final class plasmaHTCache {
private static final int stackLimit = 150; // if we exceed that limit, we do not check idle
private static final long idleDelay = 2000; // 2 seconds no hits until we think that we idle
public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day
private kelondroMap responseHeaderDB = null;
@ -82,20 +81,9 @@ public final class plasmaHTCache {
private final TreeMap cacheAge; // a <date+hash, cache-path> - relation
public long currCacheSize;
public long maxCacheSize;
private long lastAcc;
public final File cachePath;
public static serverLog log;
/*
public static final int CACHE_UNFILLED = 0; // default case without assignment
public static final int CACHE_FILL = 1; // this means: update == true
public static final int CACHE_HIT = 2; // the best case: reading from Cache
public static final int CACHE_STALE_NO_RELOAD = 3; // this shall be treated as a rare case that should not appear
public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true
public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content
public static final int CACHE_PASSING = 6; // does not touch cache, just passing
*/
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) {
//this.switchboard = switchboard;
@ -129,9 +117,6 @@ public final class plasmaHTCache {
// init stack
cacheStack = new LinkedList();
// init idle check
lastAcc = System.currentTimeMillis();
// init cache age and size management
cacheAge = new TreeMap();
currCacheSize = 0;
@ -299,10 +284,6 @@ public final class plasmaHTCache {
return new httpHeader(null, hdb);
}
public boolean idle() {
return (System.currentTimeMillis() > (idleDelay + lastAcc));
}
public boolean full() {
return (cacheStack.size() > stackLimit);
}
@ -415,20 +396,6 @@ public final class plasmaHTCache {
}
}
/*
public void saveResource(URL url, byte[] resource) {
File f = getCachePath(url);
f.getParentFile().mkdirs();
FileOutputStream fos = null;
try {
fos = new FileOutputStream(f);
htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file
} finally {
if (fos!=null)try{fos.close();}catch(Exception e){}
}
}
*/
public static boolean isPOST(String urlString) {
return ((urlString.indexOf("?") >= 0) ||
(urlString.indexOf("&") >= 0));

View File

@ -180,6 +180,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public kelondroTables facilityDB;
public plasmaParser parser;
public plasmaWordIndexClassicCacheMigration classicCache;
public long proxyLastAccess;
private serverSemaphore shutdownSync = new serverSemaphore(0);
private boolean terminate = false;
@ -209,7 +210,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
remoteProxyHost = null;
remoteProxyPort = 0;
}
proxyLastAccess = 0;
if (!(listsPath.exists())) listsPath.mkdirs();
@ -404,6 +405,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
log.logSystem("Finished Switchboard Initialization");
}
public boolean onlineCaution() {
return System.currentTimeMillis() - proxyLastAccess < 30000;
}
private static String ppRamString(int bytes) {
if (bytes < 1024) return bytes + " KByte";
bytes = bytes / 1024;
@ -557,7 +562,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public boolean deQueue() {
// work off fresh entries from the proxy or from the crawler
if (onlineCaution()) {
log.logDebug("deQueue: online caution, omitting resource stack processing");
return false;
}
plasmaSwitchboardQueue.Entry nextentry;
synchronized (sbQueue) {
if (sbQueue.size() == 0) {
@ -565,12 +573,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
return false; // nothing to do
}
// in case that the server is very busy we do not work off the queue too fast
if (!(cacheManager.idle())) try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {}
// do one processing step
log.logDebug("DEQUEUE: cacheManager=" + ((cacheManager.idle()) ? "idle" : "busy") +
", sbQueueSize=" + sbQueue.size() +
log.logDebug("DEQUEUE: sbQueueSize=" + sbQueue.size() +
", coreStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) +
", limitStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) +
", overhangStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) +
@ -666,7 +670,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
"cacheLoader=" + cacheLoader.size() + ")");
return false;
}
if (onlineCaution()) {
log.logDebug("CoreCrawl: online caution, omitting processing");
return false;
}
// if the server is busy, we do crawling more slowly
//if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
@ -797,21 +804,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
//log.logDebug("GlobalCrawl: queue is empty");
return false;
}
/*
if (queueStack.size() > 0) {
log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" +
"processStack=" + queueStack.size() + ")");
if (onlineCaution()) {
log.logDebug("GlobalCrawl: online caution, omitting processing");
return false;
}
if (noticeURL.coreStackSize() > 0) {
log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" +
"coreStackSize=" + noticeURL.coreStackSize() + ")");
return false;
}
*/
// if the server is busy, we do this more slowly
//if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
// if crawling was paused we have to wait until we wer notified to continue
synchronized(this.crawlingPausedSync) {

View File

@ -125,18 +125,17 @@ public final class plasmaWordIndexAssortmentCluster {
return;
}
// calculate appropriate cluster insert point
int clusterStart = clusterCount;
if ((((byte) wordHash.charAt(0)) & 1) == 1) {
// for every second hash, place the entries in the middle of the assortments
// this balances the entries within the assortments-cluster
// calculate minimum cluster insert point
int clusterMinStart = clusterCount;
int cap = clusterCapacity - newContainer.size() - 2 * clusterCount;
while (cap > 0) {
cap -= clusterStart;
clusterStart--;
}
cap -= clusterMinStart;
clusterMinStart--;
}
// point the real cluster insert point somewhere between the minimum and the maximum
int clusterStart = clusterCount - (int) (Math.random() * (clusterCount - clusterMinStart));
// do the insert
plasmaWordIndexEntryContainer c;
Iterator i = newContainer.entries();