do solr optimization independently from memory and load constraints:

- not doing an optimization will likely cause a too many files exception
- without optimization performance will be even worse which would
prevent optimization in the future as well (prevent a deadlock
situation)
This commit is contained in:
Michael Peter Christen 2014-04-06 11:04:23 +02:00
parent ebd44a7080
commit df138084c0

View File

@ -2301,10 +2301,11 @@ public final class Switchboard extends serverSwitch {
// execute the (post-) processing steps for all entries that have a process tag assigned // execute the (post-) processing steps for all entries that have a process tag assigned
Fulltext fulltext = index.fulltext(); Fulltext fulltext = index.fulltext();
CollectionConfiguration collection1Configuration = fulltext.getDefaultConfiguration(); CollectionConfiguration collection1Configuration = fulltext.getDefaultConfiguration();
boolean allCrawlsFinished = this.crawler.allCrawlsFinished(this.crawlQueues);
int proccount = 0;
if (!this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) && MemoryControl.available() > 512L * 1024L * 1024L && Memory.load() < 2.5f) { if (!this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) && MemoryControl.available() > 512L * 1024L * 1024L && Memory.load() < 2.5f) {
// we optimize first because that is useful for postprocessing // we optimize first because that is useful for postprocessing
int proccount = 0;
ReferenceReportCache rrCache = index.getReferenceReportCache(); ReferenceReportCache rrCache = index.getReferenceReportCache();
int clickdepth_maxtime = this.getConfigInt("postprocessing.clickdepth.maxtime", 100); int clickdepth_maxtime = this.getConfigInt("postprocessing.clickdepth.maxtime", 100);
int clickdepth_maxdepth = this.getConfigInt("postprocessing.clickdepth.maxdepth", 6); int clickdepth_maxdepth = this.getConfigInt("postprocessing.clickdepth.maxdepth", 6);
@ -2313,7 +2314,6 @@ public final class Switchboard extends serverSwitch {
this.crawler.getFinishesProfiles(this.crawlQueues) : new HashSet<String>(); this.crawler.getFinishesProfiles(this.crawlQueues) : new HashSet<String>();
int cleanupByHarvestkey = deletionCandidates.size(); int cleanupByHarvestkey = deletionCandidates.size();
boolean postprocessing = collection1Configuration.contains(CollectionSchema.process_sxt) && (index.connectedCitation() || fulltext.useWebgraph()); boolean postprocessing = collection1Configuration.contains(CollectionSchema.process_sxt) && (index.connectedCitation() || fulltext.useWebgraph());
boolean allCrawlsFinished = this.crawler.allCrawlsFinished(this.crawlQueues);
if (postprocessing && (cleanupByHarvestkey > 0 || allCrawlsFinished)) { if (postprocessing && (cleanupByHarvestkey > 0 || allCrawlsFinished)) {
if (cleanupByHarvestkey > 0) { if (cleanupByHarvestkey > 0) {
// run postprocessing on these profiles // run postprocessing on these profiles
@ -2340,6 +2340,10 @@ public final class Switchboard extends serverSwitch {
} }
} }
this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring
postprocessingStartTime = new long[]{0,0}; // the start time for the processing; not started = 0
postprocessingRunning = false;
}
if (allCrawlsFinished) { if (allCrawlsFinished) {
postprocessingRunning = true; postprocessingRunning = true;
// flush caches // flush caches
@ -2350,25 +2354,20 @@ public final class Switchboard extends serverSwitch {
long idleSearch = System.currentTimeMillis() - this.localSearchLastAccess; long idleSearch = System.currentTimeMillis() - this.localSearchLastAccess;
long idleAdmin = System.currentTimeMillis() - this.adminAuthenticationLastAccess; long idleAdmin = System.currentTimeMillis() - this.adminAuthenticationLastAccess;
long deltaOptimize = System.currentTimeMillis() - this.optimizeLastRun; long deltaOptimize = System.currentTimeMillis() - this.optimizeLastRun;
boolean optimizeRequired = deltaOptimize > 60000 * 60 * 3; // 3 hours boolean optimizeRequired = deltaOptimize > 60000 * 60 * 2 && idleAdmin > 600000; // optimize if user is idle for 10 minutes and at most every 2 hours
int opts = Math.max(1, (int) (fulltext.collectionSize() / 5000000)); int opts = Math.min(10, Math.max(1, (int) (fulltext.collectionSize() / 5000000)));
log.info("Solr auto-optimization: idleSearch=" + idleSearch + ", idleAdmin=" + idleAdmin + ", deltaOptimize=" + deltaOptimize + ", proccount=" + proccount);
if (idleAdmin > 600000) {
// only run optimization if the admin is idle (10 minutes)
if (proccount > 0) { if (proccount > 0) {
opts++; // have postprocessings will force optimazion with one more Segment which is small an quick opts++; // have postprocessings will force optimazion with one more Segment which is small an quick
optimizeRequired = true; optimizeRequired = true;
} }
log.info("Solr auto-optimization: idleSearch=" + idleSearch + ", idleAdmin=" + idleAdmin + ", deltaOptimize=" + deltaOptimize + ", proccount=" + proccount);
if (optimizeRequired) { if (optimizeRequired) {
if (idleSearch < 600000) opts++; // < 10 minutes idle time will cause a optimization with one more Segment which is small an quick if (idleSearch < 600000) opts++; // < 10 minutes idle time will cause a optimization with one more Segment which is small an quick
log.info("Solr auto-optimization: running solr.optimize(" + opts + ")"); log.info("Solr auto-optimization: running solr.optimize(" + opts + ")");
fulltext.optimize(opts); fulltext.optimize(opts);
this.optimizeLastRun = System.currentTimeMillis(); this.optimizeLastRun = System.currentTimeMillis();
} }
}
}
postprocessingStartTime = new long[]{0,0}; // the start time for the processing; not started = 0
postprocessingRunning = false; postprocessingRunning = false;
} }