mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
commit
2f5f3f8853
|
@ -20,7 +20,7 @@
|
|||
#(changed)#::<dt></dt><dd><span class="error">You need to restart for some settings to be applied</span></dd>#(/changed)#
|
||||
<dt>Enable Autocrawler:</dt>
|
||||
<dd><input id="autocrawlEnable" name="autocrawlEnable" type="checkbox" #(autocrawlEnable)#::checked="checked"#(/autocrawlEnable)# /></dd>
|
||||
<dt>Deep crawl every:</dt>
|
||||
<dt>Deep crawl every Nth document:</dt>
|
||||
<dd>
|
||||
<input id="autocrawlRatio" name="autocrawlRatio" type="number" min="1" max="500" step="1" size="2" maxlength="2" value="#[autocrawlRatio]#" />
|
||||
Warning: if this is bigger than "Rows to fetch" only shallow crawls will run.
|
||||
|
@ -47,4 +47,4 @@
|
|||
</dl>
|
||||
</form>
|
||||
</fieldset>
|
||||
</body>
|
||||
</body>
|
||||
|
|
|
@ -211,7 +211,7 @@
|
|||
<source>Enable Autocrawler:</source>
|
||||
</trans-unit>
|
||||
<trans-unit id="66a1bd2c" xml:space="preserve" approved="no" translate="yes">
|
||||
<source>Deep crawl every:</source>
|
||||
<source>Deep crawl every Nth document:</source>
|
||||
</trans-unit>
|
||||
<trans-unit id="2291c65d" xml:space="preserve" approved="no" translate="yes">
|
||||
<source>Warning: if this is bigger than "Rows to fetch" only shallow crawls will run.</source>
|
||||
|
|
|
@ -608,12 +608,19 @@ public class CrawlQueues {
|
|||
int i = 0;
|
||||
int deepRatio = Integer.parseInt(this.sb.getConfig(SwitchboardConstants.AUTOCRAWL_RATIO, "50"));
|
||||
for (SolrDocument doc: resp.getResults()) {
|
||||
if (doc == null) {
|
||||
continue;
|
||||
}
|
||||
boolean deep = false;
|
||||
i++;
|
||||
if( i % deepRatio == 0 ){
|
||||
deep = true;
|
||||
}
|
||||
DigestURL url;
|
||||
if (doc.getFieldValue("url_protocol_s") == null || doc.getFieldValue("host_s") == null) {
|
||||
//Skip this document if either of these values is null.
|
||||
continue;
|
||||
}
|
||||
final String u = doc.getFieldValue("url_protocol_s").toString() + "://" + doc.getFieldValue("host_s").toString();
|
||||
try {
|
||||
url = new DigestURL(u);
|
||||
|
|
Loading…
Reference in New Issue
Block a user