mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
removed exist() retrieval functions from error cache and replaced it
with metadata retrieval from connectors directly. This should cause better usage of the cache. Automatically increase the metadata cache if more memory is available.
This commit is contained in:
parent
62c72360ee
commit
b5fc2b63ea
|
@ -59,7 +59,6 @@ import net.yacy.search.index.Fulltext;
|
||||||
import net.yacy.search.index.Segment.ReferenceReport;
|
import net.yacy.search.index.Segment.ReferenceReport;
|
||||||
import net.yacy.search.index.Segment.ReferenceReportCache;
|
import net.yacy.search.index.Segment.ReferenceReportCache;
|
||||||
import net.yacy.search.query.QueryParams;
|
import net.yacy.search.query.QueryParams;
|
||||||
import net.yacy.search.schema.CollectionConfiguration.FailDoc;
|
|
||||||
import net.yacy.search.schema.CollectionSchema;
|
import net.yacy.search.schema.CollectionSchema;
|
||||||
import net.yacy.server.serverObjects;
|
import net.yacy.server.serverObjects;
|
||||||
import net.yacy.server.serverSwitch;
|
import net.yacy.server.serverSwitch;
|
||||||
|
@ -466,7 +465,7 @@ public class HostBrowser {
|
||||||
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
|
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
|
||||||
StoreType type = (StoreType) entry.getValue();
|
StoreType type = (StoreType) entry.getValue();
|
||||||
try {uri = new DigestURL(entry.getKey());} catch (final MalformedURLException e) {uri = null;}
|
try {uri = new DigestURL(entry.getKey());} catch (final MalformedURLException e) {uri = null;}
|
||||||
HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash(), true);
|
HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash()); // todo: cannot identify errors
|
||||||
boolean loading = load.equals(entry.getKey()) || (process != null && process != HarvestProcess.ERRORS);
|
boolean loading = load.equals(entry.getKey()) || (process != null && process != HarvestProcess.ERRORS);
|
||||||
boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED || type == StoreType.FAILED;
|
boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED || type == StoreType.FAILED;
|
||||||
boolean dc = type != StoreType.INDEX && !error && !loading && list.containsKey(entry.getKey() + "/");
|
boolean dc = type != StoreType.INDEX && !error && !loading && list.containsKey(entry.getKey() + "/");
|
||||||
|
@ -482,8 +481,8 @@ public class HostBrowser {
|
||||||
FailType failType = errorDocs.get(entry.getKey());
|
FailType failType = errorDocs.get(entry.getKey());
|
||||||
if (failType == null) {
|
if (failType == null) {
|
||||||
// maybe this is only in the errorURL
|
// maybe this is only in the errorURL
|
||||||
FailDoc faildoc = sb.crawlQueues.errorURL.get(ASCII.String(uri.hash()));
|
//Metadata faildoc = sb.index.fulltext().getDefaultConnector().getMetadata(ASCII.String(uri.hash()));
|
||||||
prop.putHTML("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS && faildoc != null ? faildoc.getFailReason() : "unknown error");
|
prop.putHTML("files_list_" + c + "_type_stored_error", "unknown error");
|
||||||
} else {
|
} else {
|
||||||
String ids = ASCII.String(uri.hash());
|
String ids = ASCII.String(uri.hash());
|
||||||
InfoCacheEntry ice = infoCache.get(ids);
|
InfoCacheEntry ice = infoCache.get(ids);
|
||||||
|
|
|
@ -32,6 +32,7 @@ import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
|
||||||
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
|
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
|
||||||
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
|
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
|
||||||
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
||||||
|
import net.yacy.kelondro.util.MemoryControl;
|
||||||
|
|
||||||
public class InstanceMirror {
|
public class InstanceMirror {
|
||||||
|
|
||||||
|
@ -165,7 +166,8 @@ public class InstanceMirror {
|
||||||
if (msc != null) return msc;
|
if (msc != null) return msc;
|
||||||
EmbeddedSolrConnector esc = getEmbeddedConnector(corename);
|
EmbeddedSolrConnector esc = getEmbeddedConnector(corename);
|
||||||
RemoteSolrConnector rsc = getRemoteConnector(corename);
|
RemoteSolrConnector rsc = getRemoteConnector(corename);
|
||||||
msc = new ConcurrentUpdateSolrConnector(new MirrorSolrConnector(esc, rsc), RemoteInstance.queueSizeByMemory(), 10000, Runtime.getRuntime().availableProcessors());
|
int cacheSize = (int) (MemoryControl.available() / 30000); // will return about 10000 for standard ram size
|
||||||
|
msc = new ConcurrentUpdateSolrConnector(new MirrorSolrConnector(esc, rsc), RemoteInstance.queueSizeByMemory(), cacheSize, Runtime.getRuntime().availableProcessors());
|
||||||
//msc = new MirrorSolrConnector(esc, rsc);
|
//msc = new MirrorSolrConnector(esc, rsc);
|
||||||
this.mirrorConnectorCache.put(corename, msc);
|
this.mirrorConnectorCache.put(corename, msc);
|
||||||
return msc;
|
return msc;
|
||||||
|
|
|
@ -43,6 +43,7 @@ import net.yacy.cora.document.id.AnchorURL;
|
||||||
import net.yacy.cora.document.id.DigestURL;
|
import net.yacy.cora.document.id.DigestURL;
|
||||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||||
import net.yacy.cora.federate.solr.FailCategory;
|
import net.yacy.cora.federate.solr.FailCategory;
|
||||||
|
import net.yacy.cora.federate.solr.connector.SolrConnector.Metadata;
|
||||||
import net.yacy.cora.order.Base64Order;
|
import net.yacy.cora.order.Base64Order;
|
||||||
import net.yacy.cora.protocol.Domains;
|
import net.yacy.cora.protocol.Domains;
|
||||||
import net.yacy.cora.protocol.ftp.FTPClient;
|
import net.yacy.cora.protocol.ftp.FTPClient;
|
||||||
|
@ -60,7 +61,6 @@ import net.yacy.repository.Blacklist.BlacklistType;
|
||||||
import net.yacy.repository.FilterEngine;
|
import net.yacy.repository.FilterEngine;
|
||||||
import net.yacy.search.Switchboard;
|
import net.yacy.search.Switchboard;
|
||||||
import net.yacy.search.index.Segment;
|
import net.yacy.search.index.Segment;
|
||||||
import net.yacy.search.schema.CollectionConfiguration;
|
|
||||||
|
|
||||||
public final class CrawlStacker {
|
public final class CrawlStacker {
|
||||||
|
|
||||||
|
@ -379,22 +379,27 @@ public final class CrawlStacker {
|
||||||
public String checkAcceptanceInitially(final DigestURL url, final CrawlProfile profile) {
|
public String checkAcceptanceInitially(final DigestURL url, final CrawlProfile profile) {
|
||||||
|
|
||||||
// check if the url is double registered
|
// check if the url is double registered
|
||||||
final HarvestProcess dbocc = this.nextQueue.exists(url.hash(), false); // returns the name of the queue if entry exists
|
final HarvestProcess dbocc = this.nextQueue.exists(url.hash()); // returns the name of the queue if entry exists
|
||||||
if (dbocc != null) {
|
if (dbocc != null) {
|
||||||
return "double in: " + dbocc.name();
|
return "double in: " + dbocc.name();
|
||||||
}
|
}
|
||||||
String urlhash = ASCII.String(url.hash());
|
String urlhash = ASCII.String(url.hash());
|
||||||
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(urlhash);
|
Metadata oldEntry = null;
|
||||||
final Date oldDate = errorEntry == null ? null : errorEntry.getFailDate();
|
try {
|
||||||
|
oldEntry = this.indexSegment.fulltext().getDefaultConnector().getMetadata(urlhash);
|
||||||
|
} catch (IOException e) {
|
||||||
|
ConcurrentLog.logException(e);
|
||||||
|
}
|
||||||
|
final Long oldDate = oldEntry == null ? null : oldEntry.date;
|
||||||
if (oldDate == null) {
|
if (oldDate == null) {
|
||||||
return null; // no evidence that we know that url
|
return null; // no evidence that we know that url
|
||||||
}
|
}
|
||||||
final boolean recrawl = profile.recrawlIfOlder() > oldDate.getTime();
|
final boolean recrawl = profile.recrawlIfOlder() > oldDate.longValue();
|
||||||
final String urlstring = url.toString();
|
final String urlstring = url.toString();
|
||||||
if (recrawl) {
|
if (recrawl) {
|
||||||
if (CrawlStacker.log.isInfo())
|
if (CrawlStacker.log.isInfo())
|
||||||
CrawlStacker.log.info("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
|
CrawlStacker.log.info("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
|
||||||
((System.currentTimeMillis() - oldDate.getTime()) / 60000 / 60 / 24) + " days ago.");
|
((System.currentTimeMillis() - oldDate.longValue()) / 60000 / 60 / 24) + " days ago.");
|
||||||
} else {
|
} else {
|
||||||
return "double in: LURL-DB, oldDate = " + oldDate.toString();
|
return "double in: LURL-DB, oldDate = " + oldDate.toString();
|
||||||
}
|
}
|
||||||
|
|
|
@ -143,7 +143,7 @@ public class CrawlQueues {
|
||||||
* @param hash
|
* @param hash
|
||||||
* @return if the hash exists, the name of the database is returned, otherwise null is returned
|
* @return if the hash exists, the name of the database is returned, otherwise null is returned
|
||||||
*/
|
*/
|
||||||
public HarvestProcess exists(final byte[] hash, final boolean checkErrorCache) {
|
public HarvestProcess exists(final byte[] hash) {
|
||||||
if (this.delegatedURL.containsKey(ASCII.String(hash))) {
|
if (this.delegatedURL.containsKey(ASCII.String(hash))) {
|
||||||
return HarvestProcess.DELEGATED;
|
return HarvestProcess.DELEGATED;
|
||||||
}
|
}
|
||||||
|
@ -155,9 +155,6 @@ public class CrawlQueues {
|
||||||
return HarvestProcess.WORKER;
|
return HarvestProcess.WORKER;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (checkErrorCache && this.errorURL.exists(hash)) {
|
|
||||||
return HarvestProcess.ERRORS;
|
|
||||||
}
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -98,6 +98,7 @@ import net.yacy.cora.document.id.MultiProtocolURL;
|
||||||
import net.yacy.cora.federate.solr.FailCategory;
|
import net.yacy.cora.federate.solr.FailCategory;
|
||||||
import net.yacy.cora.federate.solr.Ranking;
|
import net.yacy.cora.federate.solr.Ranking;
|
||||||
import net.yacy.cora.federate.solr.SchemaConfiguration;
|
import net.yacy.cora.federate.solr.SchemaConfiguration;
|
||||||
|
import net.yacy.cora.federate.solr.connector.SolrConnector.Metadata;
|
||||||
import net.yacy.cora.federate.solr.instance.RemoteInstance;
|
import net.yacy.cora.federate.solr.instance.RemoteInstance;
|
||||||
import net.yacy.cora.federate.yacy.CacheStrategy;
|
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||||
import net.yacy.cora.order.Base64Order;
|
import net.yacy.cora.order.Base64Order;
|
||||||
|
@ -1616,7 +1617,16 @@ public final class Switchboard extends serverSwitch {
|
||||||
*/
|
*/
|
||||||
public HarvestProcess urlExists(final String hash) {
|
public HarvestProcess urlExists(final String hash) {
|
||||||
if (this.index.getLoadTime(hash) >= 0) return HarvestProcess.LOADED;
|
if (this.index.getLoadTime(hash) >= 0) return HarvestProcess.LOADED;
|
||||||
return this.crawlQueues.exists(ASCII.getBytes(hash), true);
|
HarvestProcess hp = this.crawlQueues.exists(ASCII.getBytes(hash));
|
||||||
|
if (hp != null) return hp;
|
||||||
|
try {
|
||||||
|
Metadata md = this.index.fulltext().getDefaultConnector().getMetadata(hash);
|
||||||
|
if (md == null) return null;
|
||||||
|
return HarvestProcess.LOADED; // todo: can also be in error
|
||||||
|
} catch (IOException e) {
|
||||||
|
ConcurrentLog.logException(e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void urlRemove(final Segment segment, final byte[] hash) {
|
public void urlRemove(final Segment segment, final byte[] hash) {
|
||||||
|
|
|
@ -168,6 +168,7 @@ public class ErrorCache {
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
public CollectionConfiguration.FailDoc get(final String urlhash) {
|
public CollectionConfiguration.FailDoc get(final String urlhash) {
|
||||||
CollectionConfiguration.FailDoc failDoc = null;
|
CollectionConfiguration.FailDoc failDoc = null;
|
||||||
synchronized (this.cache) {
|
synchronized (this.cache) {
|
||||||
|
@ -185,7 +186,6 @@ public class ErrorCache {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean exists(final byte[] urlHash) {
|
public boolean exists(final byte[] urlHash) {
|
||||||
String urlHashString = ASCII.String(urlHash);
|
String urlHashString = ASCII.String(urlHash);
|
||||||
try {
|
try {
|
||||||
|
@ -200,7 +200,7 @@ public class ErrorCache {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
public void clearStack() {
|
public void clearStack() {
|
||||||
synchronized (this.cache) {
|
synchronized (this.cache) {
|
||||||
this.cache.clear();
|
this.cache.clear();
|
||||||
|
|
Loading…
Reference in New Issue
Block a user