added new default profiles to distinguish snippet fetch for local and global search

the difference is, that a local search will no not cause a re-indexing of loaded pages

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4731 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2008-04-24 08:42:08 +00:00
parent 2c0c8f0f0c
commit e024e3b9cf
19 changed files with 87 additions and 53 deletions

View File

@ -200,7 +200,7 @@ public class Bookmarks {
plasmaParserDocument document = null;
if (urlentry != null) {
indexURLReference.Components comp = urlentry.comp();
document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true);
document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true, false);
prop.put("mode_edit", "0"); // create mode
prop.put("mode_url", comp.url().toNormalform(false, true));
prop.putHTML("mode_title", comp.dc_title());

View File

@ -115,8 +115,10 @@ public class IndexCreateWWWLocalQueue_p {
final String name = entry.name();
if (name.equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
name.equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
name.equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
name.equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA))
name.equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ||
name.equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ||
name.equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ||
name.equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA))
continue;
if (compiledPattern.matcher(name).find()) {
sb.profilesActiveCrawls.removeEntry(entry.handle());

View File

@ -166,7 +166,7 @@ public class ViewFile {
if (resource == null) {
plasmaHTCache.Entry entry = null;
try {
entry = sb.crawlQueues.loadResourceFromWeb(url, 5000, false, true);
entry = sb.crawlQueues.loadResourceFromWeb(url, 5000, false, true, false);
} catch (Exception e) {
prop.put("error", "4");
prop.putHTML("error_errorText", e.getMessage());

View File

@ -98,7 +98,7 @@ public class ViewImage {
// getting the image as stream
Image scaled = iconcache.get(urlString);
if (scaled == null) {
Object[] resource = plasmaSnippetCache.getResource(url, true, timeout, false);
Object[] resource = plasmaSnippetCache.getResource(url, true, timeout, false, true);
byte[] imgb = null;
if (resource == null) {
if (urlString.endsWith(".ico")) {

View File

@ -37,8 +37,10 @@ public class WatchWebStructure_p {
e = it.next();
if (e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA))
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA))
continue;
host = e.name();
break; // take the first one

View File

@ -200,7 +200,7 @@ public class ymarks {
plasmaParserDocument document = null;
if (urlentry != null) {
indexURLReference.Components comp = urlentry.comp();
document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true);
document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true, true);
prop.put("mode_edit", "0"); // create mode
prop.put("mode_url", comp.url().toNormalform(false, true));
prop.putHTML("mode_title", comp.dc_title());

View File

@ -53,7 +53,7 @@ public class sidebar_history {
if (visibleQueries.contains(query.queryString)) continue; // avoid doubles
visibleQueries.add(query.queryString);
prop.put("history_list_" + c + "_querystring", query.queryString);
prop.put("history_list_" + c + "_searchdom", query.searchdom());
prop.put("history_list_" + c + "_searchdom", ((query.isLocal()) ? "local" : "global"));
prop.put("history_list_" + c + "_contentdom", query.contentdom());
c++;
if (c >= 10) break;

View File

@ -117,7 +117,7 @@ public class sidebar_navigation {
prop.put("navigation_topwords_words_" + hintcount + "_count", theQuery.displayResults());
prop.put("navigation_topwords_words_" + hintcount + "_offset", "0");
prop.put("navigation_topwords_words_" + hintcount + "_contentdom", theQuery.contentdom());
prop.put("navigation_topwords_words_" + hintcount + "_resource", theQuery.searchdom());
prop.put("navigation_topwords_words_" + hintcount + "_resource", ((theQuery.isLocal()) ? "local" : "global"));
prop.put("navigation_topwords_words_" + hintcount + "_zonecode", theQuery.zonecode);
}
hintcount++;
@ -182,7 +182,7 @@ public class sidebar_navigation {
"<a href=\"ysearch.html?search=" + theQuery.queryString() +
"&amp;count="+ theQuery.displayResults() +
"&amp;offset=" + (page * theQuery.displayResults()) +
"&amp;resource=" + theQuery.searchdom() +
"&amp;resource=" + ((theQuery.isLocal()) ? "local" : "global") +
"&amp;urlmaskfilter=" + theQuery.urlMask +
"&amp;prefermaskfilter=" + theQuery.prefer +
"&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
@ -195,7 +195,7 @@ public class sidebar_navigation {
prop.putHTML("navigation_languagezone_" + zonename + "_search", theQuery.queryString.replace(' ', '+'));
prop.put("navigation_languagezone_" + zonename + "_offset", "0");
prop.put("navigation_languagezone_" + zonename + "_contentdom", theQuery.contentdom());
prop.put("navigation_languagezone_" + zonename + "_resource", theQuery.searchdom());
prop.put("navigation_languagezone_" + zonename + "_resource", ((theQuery.isLocal()) ? "local" : "global"));
prop.put("navigation_languagezone_" + zonename, 1);
}

View File

@ -222,7 +222,7 @@ public class yacysearch {
if (urlentry != null) {
indexURLReference.Components comp = urlentry.comp();
plasmaParserDocument document;
document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true);
document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true, false);
if (document != null) {
// create a news message
HashMap<String, String> map = new HashMap<String, String>();
@ -417,7 +417,7 @@ public class yacysearch {
"&amp;search=" + theQuery.queryString() +
"&amp;count="+ theQuery.displayResults() +
"&amp;offset=" + (page * theQuery.displayResults()) +
"&amp;resource=" + theQuery.searchdom() +
"&amp;resource=" + ((theQuery.isLocal()) ? "local" : "global") +
"&amp;urlmaskfilter=" + theQuery.urlMask +
"&amp;prefermaskfilter=" + theQuery.prefer +
"&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +

View File

@ -155,7 +155,7 @@ public class yacysearchitem {
prop.put("references_words_" + hintcount + "_count", theQuery.displayResults());
prop.put("references_words_" + hintcount + "_offset", "0");
prop.put("references_words_" + hintcount + "_contentdom", theQuery.contentdom());
prop.put("references_words_" + hintcount + "_resource", theQuery.searchdom());
prop.put("references_words_" + hintcount + "_resource", ((theQuery.isLocal()) ? "local" : "global"));
}
prop.put("references_words", hintcount);
if (hintcount++ > MAX_TOPWORDS) {

View File

@ -436,7 +436,8 @@ public class plasmaCrawlQueues {
yacyURL url,
int socketTimeout,
boolean keepInMemory,
boolean forText
boolean forText,
boolean global
) {
plasmaCrawlEntry centry = new plasmaCrawlEntry(
@ -445,7 +446,14 @@ public class plasmaCrawlQueues {
null,
"",
new Date(),
(forText) ? sb.defaultTextSnippetProfile.handle() : sb.defaultMediaSnippetProfile.handle(), // crawl profile
(forText) ?
((global) ?
sb.defaultTextSnippetGlobalProfile.handle() :
sb.defaultTextSnippetLocalProfile.handle())
:
((global) ?
sb.defaultMediaSnippetGlobalProfile.handle() :
sb.defaultMediaSnippetLocalProfile.handle()), // crawl profile
0,
0,
0);

View File

@ -667,7 +667,7 @@ public final class plasmaParser {
}
if (!documentCharset.equalsIgnoreCase(charset)) {
this.theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "'");
this.theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
}
// parsing the content

View File

@ -350,7 +350,7 @@ public final class plasmaSearchEvent {
if (query.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) {
// attach text snippet
startTime = System.currentTimeMillis();
plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(comp, snippetFetchWordHashes, (snippetFetchMode == 2), ((query.constraint != null) && (query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000, (snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000);
plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(comp, snippetFetchWordHashes, (snippetFetchMode == 2), ((query.constraint != null) && (query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000, (snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000, query.isGlobal());
long snippetComputationTime = System.currentTimeMillis() - startTime;
serverLog.logInfo("SEARCH_EVENT", "text snippet load time for " + comp.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
@ -370,7 +370,7 @@ public final class plasmaSearchEvent {
} else {
// attach media information
startTime = System.currentTimeMillis();
ArrayList<MediaSnippet> mediaSnippets = plasmaSnippetCache.retrieveMediaSnippets(comp.url(), snippetFetchWordHashes, query.contentdom, (snippetFetchMode == 2), 6000);
ArrayList<MediaSnippet> mediaSnippets = plasmaSnippetCache.retrieveMediaSnippets(comp.url(), snippetFetchWordHashes, query.contentdom, (snippetFetchMode == 2), 6000, query.isGlobal());
long snippetComputationTime = System.currentTimeMillis() - startTime;
serverLog.logInfo("SEARCH_EVENT", "media snippet load time for " + comp.url() + ": " + snippetComputationTime);

View File

@ -56,11 +56,11 @@ public final class plasmaSearchImages {
private HashMap<String, htmlFilterImageEntry> images;
public plasmaSearchImages(long maxTime, yacyURL url, int depth) {
public plasmaSearchImages(long maxTime, yacyURL url, int depth, boolean indexing) {
long start = System.currentTimeMillis();
this.images = new HashMap<String, htmlFilterImageEntry>();
if (maxTime > 10) {
Object[] resource = plasmaSnippetCache.getResource(url, true, (int) maxTime, false);
Object[] resource = plasmaSnippetCache.getResource(url, true, (int) maxTime, false, indexing);
InputStream res = (InputStream) resource[0];
Long resLength = (Long) resource[1];
if (res != null) {
@ -85,7 +85,7 @@ public final class plasmaSearchImages {
while (i.hasNext()) {
try {
nexturlstring = i.next().toNormalform(true, true);
addAll(new plasmaSearchImages(serverDate.remainingTime(start, maxTime, 10), new yacyURL(nexturlstring, null), depth - 1));
addAll(new plasmaSearchImages(serverDate.remainingTime(start, maxTime, 10), new yacyURL(nexturlstring, null), depth - 1, indexing));
} catch (MalformedURLException e1) {
e1.printStackTrace();
}

View File

@ -193,8 +193,12 @@ public final class plasmaSearchQuery {
return "text";
}
public String searchdom() {
return (this.domType == SEARCHDOM_LOCAL) ? "local" : "global";
public boolean isGlobal() {
return this.domType != SEARCHDOM_LOCAL;
}
public boolean isLocal() {
return this.domType != SEARCHDOM_LOCAL;
}
public static TreeSet<String> hashes2Set(String query) {

View File

@ -255,7 +255,7 @@ public class plasmaSnippetCache {
}
@SuppressWarnings("unchecked")
public static TextSnippet retrieveTextSnippet(indexURLReference.Components comp, Set<String> queryhashes, boolean fetchOnline, boolean pre, int snippetMaxLength, int timeout, int maxDocLen) {
public static TextSnippet retrieveTextSnippet(indexURLReference.Components comp, Set<String> queryhashes, boolean fetchOnline, boolean pre, int snippetMaxLength, int timeout, int maxDocLen, boolean reindexing) {
// heise = "0OQUNU3JSs05"
yacyURL url = comp.url();
if (queryhashes.size() == 0) {
@ -305,7 +305,7 @@ public class plasmaSnippetCache {
// if not found try to download it
// download resource using the crawler and keep resource in memory if possible
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, true);
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, true, reindexing);
// place entry on crawl queue
plasmaHTCache.push(entry);
@ -398,9 +398,10 @@ public class plasmaSnippetCache {
* @param fetchOnline specifies if the resource should be loaded from web if it'as not available in the cache
* @param timeout
* @param forText
* @param global the domain of the search. If global == true then the content is re-indexed
* @return the parsed document as {@link plasmaParserDocument}
*/
public static plasmaParserDocument retrieveDocument(yacyURL url, boolean fetchOnline, int timeout, boolean forText) {
public static plasmaParserDocument retrieveDocument(yacyURL url, boolean fetchOnline, int timeout, boolean forText, boolean global) {
// load resource
long resContentLength = 0;
@ -416,7 +417,7 @@ public class plasmaSnippetCache {
// if not found try to download it
// download resource using the crawler and keep resource in memory if possible
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, forText);
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, forText, global);
// getting resource metadata (e.g. the http headers for http resources)
if (entry != null) {
@ -648,13 +649,13 @@ public class plasmaSnippetCache {
}
}
public static ArrayList<MediaSnippet> retrieveMediaSnippets(yacyURL url, Set<String> queryhashes, int mediatype, boolean fetchOnline, int timeout) {
public static ArrayList<MediaSnippet> retrieveMediaSnippets(yacyURL url, Set<String> queryhashes, int mediatype, boolean fetchOnline, int timeout, boolean reindexing) {
if (queryhashes.size() == 0) {
serverLog.logFine("snippet fetch", "no query hashes given for url " + url);
return new ArrayList<MediaSnippet>();
}
plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout, false);
plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout, false, reindexing);
ArrayList<MediaSnippet> a = new ArrayList<MediaSnippet>();
if (document != null) {
if ((mediatype == plasmaSearchQuery.CONTENTDOM_ALL) || (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, plasmaSearchQuery.CONTENTDOM_AUDIO));
@ -860,7 +861,7 @@ public class plasmaSnippetCache {
* <tr><td>[1]</td><td>the content-length as {@link Integer}</td></tr>
* </table>
*/
public static Object[] getResource(yacyURL url, boolean fetchOnline, int socketTimeout, boolean forText) {
public static Object[] getResource(yacyURL url, boolean fetchOnline, int socketTimeout, boolean forText, boolean reindexing) {
// load the url as resource from the web
long contentLength = -1;
@ -872,7 +873,7 @@ public class plasmaSnippetCache {
// if the content is not available in cache try to download it from web
// try to download the resource using a crawler
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout, true, forText);
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout, true, forText, reindexing);
if (entry == null) return null; // not found in web
// read resource body (if it is there)

View File

@ -212,8 +212,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
public plasmaCrawlProfile profilesActiveCrawls, profilesPassiveCrawls;
public plasmaCrawlProfile.entry defaultProxyProfile;
public plasmaCrawlProfile.entry defaultRemoteProfile;
public plasmaCrawlProfile.entry defaultTextSnippetProfile;
public plasmaCrawlProfile.entry defaultMediaSnippetProfile;
public plasmaCrawlProfile.entry defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
public plasmaCrawlProfile.entry defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
public boolean rankingOn;
public plasmaRankingDistribution rankingOwnDistribution;
public plasmaRankingDistribution rankingOtherDistribution;
@ -648,10 +648,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
// Miscellaneous settings
//////////////////////////////////////////////////////////////////////////////////////////////
public static final String CRAWL_PROFILE_PROXY = "proxy";
public static final String CRAWL_PROFILE_REMOTE = "remote";
public static final String CRAWL_PROFILE_SNIPPET_TEXT = "snippetText";
public static final String CRAWL_PROFILE_SNIPPET_MEDIA = "snippetMedia";
public static final String CRAWL_PROFILE_PROXY = "proxy";
public static final String CRAWL_PROFILE_REMOTE = "remote";
public static final String CRAWL_PROFILE_SNIPPET_LOCAL_TEXT = "snippetLocalText";
public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT = "snippetGlobalText";
public static final String CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA = "snippetLocalMedia";
public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA = "snippetGlobalMedia";
/**
* <p><code>public static final String <strong>CRAWLER_THREADS_ACTIVE_MAX</strong> = "crawler.MaxActiveThreads"</code></p>
@ -1511,8 +1513,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
private void initActiveCrawlProfiles() {
this.defaultProxyProfile = null;
this.defaultRemoteProfile = null;
this.defaultTextSnippetProfile = null;
this.defaultMediaSnippetProfile = null;
this.defaultTextSnippetLocalProfile = null;
this.defaultTextSnippetGlobalProfile = null;
this.defaultMediaSnippetLocalProfile = null;
this.defaultMediaSnippetGlobalProfile = null;
Iterator<plasmaCrawlProfile.entry> i = this.profilesActiveCrawls.profiles(true);
plasmaCrawlProfile.entry profile;
String name;
@ -1521,8 +1525,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
name = profile.name();
if (name.equals(CRAWL_PROFILE_PROXY)) this.defaultProxyProfile = profile;
if (name.equals(CRAWL_PROFILE_REMOTE)) this.defaultRemoteProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_TEXT)) this.defaultTextSnippetProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_MEDIA)) this.defaultMediaSnippetProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) this.defaultTextSnippetLocalProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) this.defaultTextSnippetGlobalProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) this.defaultMediaSnippetLocalProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) this.defaultMediaSnippetGlobalProfile = profile;
}
if (this.defaultProxyProfile == null) {
// generate new default entry for proxy crawling
@ -1540,14 +1546,24 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, ".*", ".*", 0, 0,
-1, -1, -1, true, true, true, false, true, false, true, true, false);
}
if (this.defaultTextSnippetProfile == null) {
if (this.defaultTextSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
defaultTextSnippetProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_TEXT, null, ".*", ".*", 0, 0,
defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, ".*", ".*", 0, 0,
60 * 24 * 30, -1, -1, true, false, false, false, false, false, true, true, false);
}
if (this.defaultTextSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, ".*", ".*", 0, 0,
60 * 24 * 30, -1, -1, true, true, true, true, true, false, true, true, false);
}
if (this.defaultMediaSnippetProfile == null) {
if (this.defaultMediaSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
defaultMediaSnippetProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_MEDIA, null, ".*", ".*", 0, 0,
defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, ".*", ".*", 0, 0,
60 * 24 * 30, -1, -1, true, false, false, false, false, false, true, true, false);
}
if (this.defaultMediaSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, ".*", ".*", 0, 0,
60 * 24 * 30, -1, -1, true, false, true, true, true, false, true, true, false);
}
}
@ -1598,8 +1614,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
entry = iter.next();
if (!((entry.name().equals(CRAWL_PROFILE_PROXY)) ||
(entry.name().equals(CRAWL_PROFILE_REMOTE)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_TEXT)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_MEDIA)))) {
(entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)))) {
profilesPassiveCrawls.newEntry(entry.map());
iter.remove();
hasDoneSomething = true;
@ -2253,7 +2271,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
// check for interruption
checkInterruption();
log.logInfo("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason);
log.logFine("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason);
addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), entry.initiator(), dc_title, noIndexReason, new kelondroBitfield());
/*
if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) {
@ -2400,7 +2418,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
InputStream resourceContent = null;
try {
// get the resource content
Object[] resource = plasmaSnippetCache.getResource(comp.url(), fetchOnline, 10000, true);
Object[] resource = plasmaSnippetCache.getResource(comp.url(), fetchOnline, 10000, true, false);
resourceContent = (InputStream) resource[0];
Long resourceContentLength = (Long) resource[1];

View File

@ -34,7 +34,6 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

View File

@ -78,7 +78,7 @@ public class ymageOSM {
InputStream tileStream = plasmaHTCache.getResourceContentStream(tileURL);
if (tileStream == null) {
// download resource using the crawler and keep resource in memory if possible
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(tileURL, 20000, true, false);
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(tileURL, 20000, true, false, false);
if ((entry == null) || (entry.cacheArray() == null)) return null;
tileStream = new ByteArrayInputStream(entry.cacheArray());
}