mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
re-implemented proxy-busy-check and fixed some other things
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@421 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
98d22a73ef
commit
c64970fa47
|
@ -165,8 +165,14 @@ Error with file input "#[crawlingStart]#": #[error]#
|
|||
Set new prefetch depth to "#[newproxyPrefetchDepth]#"
|
||||
::
|
||||
Crawling of "#[crawlingURL]#" started.
|
||||
You can monitor the crawling progress with this page.
|
||||
<b>Please wait some seconds before refresh of this page, because the request is enqueued and delayed until the http server is idle for a certain time.</b>
|
||||
You can monitor the crawling progress either by watching the URL queues
|
||||
(<a href="/IndexCreateWWWLocalQueue_p.html">local queue</a>,
|
||||
<a href="/IndexCreateWWWGlobalQueue_p.html">global queue</a>,
|
||||
<a href="/IndexCreateLoaderQueue_p.html">loader queue</a>,
|
||||
<a href="/IndexCreateLoaderQueue_p.html">indexing queue</a>)
|
||||
or see the fill/process count of all queues on the
|
||||
<a href="/Performance_p.html">performance page</a>.
|
||||
<b>Please wait some seconds, because the request is enqueued and delayed until the http server is idle for a certain time.</b>
|
||||
The indexing result is presented on the
|
||||
<a href="IndexMonitor.html">Index Monitor</a>-page.
|
||||
<b>It will take at least 30 seconds until the first result appears there. Please be patient, the crawling will pause each time you use the proxy or web server to ensure maximum availability.</b>
|
||||
|
|
|
@ -54,7 +54,7 @@
|
|||
<tr class="TableCellLight">
|
||||
<td class="small" align="left" colspan="18">
|
||||
<input type="submit" name="submitdelay" value="Submit New Delay Values">
|
||||
<input type="submit" name="submitdefault" value="Submit Default Values">
|
||||
<input type="submit" name="submitdefault" value="Reset To Default Values">
|
||||
Changes take effect immediately</td>
|
||||
</tr>
|
||||
</form>
|
||||
|
|
BIN
htroot/env/grafics/notifier.gif
vendored
BIN
htroot/env/grafics/notifier.gif
vendored
Binary file not shown.
Before Width: | Height: | Size: 836 B After Width: | Height: | Size: 824 B |
|
@ -272,6 +272,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
Date requestDate = new Date(); // remember the time...
|
||||
this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
|
||||
if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction();
|
||||
switchboard.proxyLastAccess = System.currentTimeMillis();
|
||||
|
||||
// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
|
||||
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2);
|
||||
|
@ -786,6 +787,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
String args = conProp.getProperty("ARGS"); // may be null if no args were given
|
||||
String httpVer = conProp.getProperty(httpd.CONNECTION_PROP_HTTP_VER);
|
||||
|
||||
switchboard.proxyLastAccess = System.currentTimeMillis();
|
||||
|
||||
int port;
|
||||
int pos;
|
||||
if ((pos = host.indexOf(":")) < 0) {
|
||||
|
@ -866,6 +869,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
// remembering the starting time of the request
|
||||
Date requestDate = new Date(); // remember the time...
|
||||
this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
|
||||
switchboard.proxyLastAccess = System.currentTimeMillis();
|
||||
|
||||
// using an ByteCount OutputStream to count the send bytes
|
||||
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2);
|
||||
|
@ -953,6 +957,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
|
||||
public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException {
|
||||
this.connectionProperties = conProp;
|
||||
switchboard.proxyLastAccess = System.currentTimeMillis();
|
||||
|
||||
String host = conProp.getProperty("HOST");
|
||||
int port = Integer.parseInt(conProp.getProperty("PORT"));
|
||||
|
|
|
@ -74,7 +74,6 @@ import de.anomic.tools.enumerateFiles;
|
|||
public final class plasmaHTCache {
|
||||
|
||||
private static final int stackLimit = 150; // if we exceed that limit, we do not check idle
|
||||
private static final long idleDelay = 2000; // 2 seconds no hits until we think that we idle
|
||||
public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day
|
||||
|
||||
private kelondroMap responseHeaderDB = null;
|
||||
|
@ -82,20 +81,9 @@ public final class plasmaHTCache {
|
|||
private final TreeMap cacheAge; // a <date+hash, cache-path> - relation
|
||||
public long currCacheSize;
|
||||
public long maxCacheSize;
|
||||
private long lastAcc;
|
||||
public final File cachePath;
|
||||
public static serverLog log;
|
||||
|
||||
/*
|
||||
public static final int CACHE_UNFILLED = 0; // default case without assignment
|
||||
public static final int CACHE_FILL = 1; // this means: update == true
|
||||
public static final int CACHE_HIT = 2; // the best case: reading from Cache
|
||||
public static final int CACHE_STALE_NO_RELOAD = 3; // this shall be treated as a rare case that should not appear
|
||||
public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true
|
||||
public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content
|
||||
public static final int CACHE_PASSING = 6; // does not touch cache, just passing
|
||||
*/
|
||||
|
||||
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) {
|
||||
//this.switchboard = switchboard;
|
||||
|
||||
|
@ -129,10 +117,7 @@ public final class plasmaHTCache {
|
|||
// init stack
|
||||
cacheStack = new LinkedList();
|
||||
|
||||
// init idle check
|
||||
lastAcc = System.currentTimeMillis();
|
||||
|
||||
// init cache age and size management
|
||||
// init cache age and size management
|
||||
cacheAge = new TreeMap();
|
||||
currCacheSize = 0;
|
||||
this.maxCacheSize = maxCacheSize;
|
||||
|
@ -299,10 +284,6 @@ public final class plasmaHTCache {
|
|||
return new httpHeader(null, hdb);
|
||||
}
|
||||
|
||||
public boolean idle() {
|
||||
return (System.currentTimeMillis() > (idleDelay + lastAcc));
|
||||
}
|
||||
|
||||
public boolean full() {
|
||||
return (cacheStack.size() > stackLimit);
|
||||
}
|
||||
|
@ -415,20 +396,6 @@ public final class plasmaHTCache {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public void saveResource(URL url, byte[] resource) {
|
||||
File f = getCachePath(url);
|
||||
f.getParentFile().mkdirs();
|
||||
FileOutputStream fos = null;
|
||||
try {
|
||||
fos = new FileOutputStream(f);
|
||||
htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file
|
||||
} finally {
|
||||
if (fos!=null)try{fos.close();}catch(Exception e){}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
public static boolean isPOST(String urlString) {
|
||||
return ((urlString.indexOf("?") >= 0) ||
|
||||
(urlString.indexOf("&") >= 0));
|
||||
|
|
|
@ -180,6 +180,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
public kelondroTables facilityDB;
|
||||
public plasmaParser parser;
|
||||
public plasmaWordIndexClassicCacheMigration classicCache;
|
||||
public long proxyLastAccess;
|
||||
|
||||
private serverSemaphore shutdownSync = new serverSemaphore(0);
|
||||
private boolean terminate = false;
|
||||
|
@ -209,7 +210,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
remoteProxyHost = null;
|
||||
remoteProxyPort = 0;
|
||||
}
|
||||
|
||||
proxyLastAccess = 0;
|
||||
|
||||
if (!(listsPath.exists())) listsPath.mkdirs();
|
||||
|
||||
|
@ -404,6 +405,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
log.logSystem("Finished Switchboard Initialization");
|
||||
}
|
||||
|
||||
public boolean onlineCaution() {
|
||||
return System.currentTimeMillis() - proxyLastAccess < 30000;
|
||||
}
|
||||
|
||||
private static String ppRamString(int bytes) {
|
||||
if (bytes < 1024) return bytes + " KByte";
|
||||
bytes = bytes / 1024;
|
||||
|
@ -557,7 +562,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
|
||||
public boolean deQueue() {
|
||||
// work off fresh entries from the proxy or from the crawler
|
||||
|
||||
if (onlineCaution()) {
|
||||
log.logDebug("deQueue: online caution, omitting resource stack processing");
|
||||
return false;
|
||||
}
|
||||
plasmaSwitchboardQueue.Entry nextentry;
|
||||
synchronized (sbQueue) {
|
||||
if (sbQueue.size() == 0) {
|
||||
|
@ -565,12 +573,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
return false; // nothing to do
|
||||
}
|
||||
|
||||
// in case that the server is very busy we do not work off the queue too fast
|
||||
if (!(cacheManager.idle())) try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {}
|
||||
|
||||
// do one processing step
|
||||
log.logDebug("DEQUEUE: cacheManager=" + ((cacheManager.idle()) ? "idle" : "busy") +
|
||||
", sbQueueSize=" + sbQueue.size() +
|
||||
log.logDebug("DEQUEUE: sbQueueSize=" + sbQueue.size() +
|
||||
", coreStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) +
|
||||
", limitStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) +
|
||||
", overhangStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) +
|
||||
|
@ -666,7 +670,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
"cacheLoader=" + cacheLoader.size() + ")");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (onlineCaution()) {
|
||||
log.logDebug("CoreCrawl: online caution, omitting processing");
|
||||
return false;
|
||||
}
|
||||
// if the server is busy, we do crawling more slowly
|
||||
//if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
|
||||
|
||||
|
@ -797,21 +804,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
//log.logDebug("GlobalCrawl: queue is empty");
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
if (queueStack.size() > 0) {
|
||||
log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" +
|
||||
"processStack=" + queueStack.size() + ")");
|
||||
if (onlineCaution()) {
|
||||
log.logDebug("GlobalCrawl: online caution, omitting processing");
|
||||
return false;
|
||||
}
|
||||
if (noticeURL.coreStackSize() > 0) {
|
||||
log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" +
|
||||
"coreStackSize=" + noticeURL.coreStackSize() + ")");
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
// if the server is busy, we do this more slowly
|
||||
//if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
|
||||
|
||||
// if crawling was paused we have to wait until we wer notified to continue
|
||||
synchronized(this.crawlingPausedSync) {
|
||||
|
|
|
@ -125,18 +125,17 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
return;
|
||||
}
|
||||
|
||||
// calculate appropriate cluster insert point
|
||||
int clusterStart = clusterCount;
|
||||
if ((((byte) wordHash.charAt(0)) & 1) == 1) {
|
||||
// for every second hash, place the entries in the middle of the assortments
|
||||
// this balances the entries within the assortments-cluster
|
||||
int cap = clusterCapacity - newContainer.size() - 2 * clusterCount;
|
||||
while (cap > 0) {
|
||||
cap -= clusterStart;
|
||||
clusterStart--;
|
||||
}
|
||||
// calculate minimum cluster insert point
|
||||
int clusterMinStart = clusterCount;
|
||||
int cap = clusterCapacity - newContainer.size() - 2 * clusterCount;
|
||||
while (cap > 0) {
|
||||
cap -= clusterMinStart;
|
||||
clusterMinStart--;
|
||||
}
|
||||
|
||||
// point the real cluster insert point somewhere between the minimum and the maximum
|
||||
int clusterStart = clusterCount - (int) (Math.random() * (clusterCount - clusterMinStart));
|
||||
|
||||
// do the insert
|
||||
plasmaWordIndexEntryContainer c;
|
||||
Iterator i = newContainer.entries();
|
||||
|
|
Loading…
Reference in New Issue
Block a user