mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
ViewFile servlet: update index if newer,
so viewed text and metadata (stored) info is similar - to archive it, use request with profile to allow indexing (defaultglobaltext) and update index (the resource is loaded, parsed anyway, so it's not a expensive operation) Request: remove 2 unused init parameter - number of anchors of the parent - forkfactor sum of anchors of all ancestors
This commit is contained in:
parent
226aea5914
commit
ff18129def
|
@ -161,7 +161,7 @@ public class HostBrowser {
|
|||
sb.peers.mySeed().hash.getBytes(),
|
||||
url, null, load, new Date(),
|
||||
sb.crawler.defaultProxyProfile.handle(),
|
||||
0, 0, 0
|
||||
0
|
||||
));
|
||||
prop.putHTML("result", reasonString == null ? ("added url to indexer: " + load) : ("not indexed url '" + load + "': " + reasonString));
|
||||
if (wait) waitloop: for (int i = 0; i < 30; i++) {
|
||||
|
|
|
@ -174,8 +174,6 @@ public class QuickCrawlLink_p {
|
|||
(title==null)?"CRAWLING-ROOT":title,
|
||||
new Date(),
|
||||
pe.handle(),
|
||||
0,
|
||||
0,
|
||||
0
|
||||
));
|
||||
|
||||
|
|
|
@ -178,7 +178,8 @@ public class ViewFile {
|
|||
Response response = null;
|
||||
try {
|
||||
ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", ClientIdentification.yacyInternetCrawlerAgentName));
|
||||
response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, agent);
|
||||
// use sb.loader.requst( , , global=true) to use crawlprofile to allow index update
|
||||
response = sb.loader.load(sb.loader.request(url, true, true), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, agent);
|
||||
} catch (final IOException e) {
|
||||
prop.put("error", "4");
|
||||
prop.put("error_errorText", "error loading resource: " + e.getMessage());
|
||||
|
@ -374,6 +375,10 @@ public class ViewFile {
|
|||
prop.put("showSnippet_teasertext", desc);
|
||||
prop.put("showSnippet", 1);
|
||||
}
|
||||
// update index with parsed resouce if index entry is older
|
||||
if (urlEntry.loaddate().before(response.lastModified())) {
|
||||
Switchboard.getSwitchboard().toIndexer(response);
|
||||
}
|
||||
if (document != null) document.close();
|
||||
}
|
||||
prop.put("error", "0");
|
||||
|
|
|
@ -102,8 +102,6 @@ public class push_p {
|
|||
"", // the name of the document to crawl
|
||||
new Date(), // current date
|
||||
profile.handle(), // the name of the prefetch profile. This must not be null!
|
||||
0, // depth the crawling depth of the entry
|
||||
0, // anchors number of anchors of the parent
|
||||
0); // forkfactor sum of anchors of all ancestors
|
||||
Response response = new Response(
|
||||
request,
|
||||
|
|
|
@ -78,8 +78,6 @@ public class rct_p {
|
|||
"REMOTE-CRAWLING",
|
||||
loaddate,
|
||||
sb.crawler.defaultRemoteProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0));
|
||||
} else {
|
||||
env.getLog().warn("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason);
|
||||
|
|
|
@ -209,8 +209,6 @@ public final class CrawlStacker {
|
|||
url.getNameProperty(),
|
||||
new Date(),
|
||||
profileHandle,
|
||||
0,
|
||||
0,
|
||||
0
|
||||
));
|
||||
}
|
||||
|
@ -250,8 +248,6 @@ public final class CrawlStacker {
|
|||
MultiProtocolURL.unescape(entry.name),
|
||||
entry.date,
|
||||
profileHandle,
|
||||
0,
|
||||
0,
|
||||
0));
|
||||
}
|
||||
} catch (final IOException e1) {
|
||||
|
@ -276,8 +272,6 @@ public final class CrawlStacker {
|
|||
"CRAWLING-ROOT",
|
||||
new Date(),
|
||||
pe.handle(),
|
||||
0,
|
||||
0,
|
||||
0));
|
||||
}
|
||||
|
||||
|
|
|
@ -530,8 +530,6 @@ public class CrawlQueues {
|
|||
item.getDescriptions().size() > 0 ? item.getDescriptions().get(0) : "",
|
||||
loaddate,
|
||||
this.sb.crawler.defaultRemoteProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0
|
||||
));
|
||||
} else {
|
||||
|
|
|
@ -119,7 +119,7 @@ public class Request extends WorkflowJob
|
|||
* @param referrerhash
|
||||
*/
|
||||
public Request(final DigestURL url, final byte[] referrerhash) {
|
||||
this(null, url, referrerhash, null, null, null, 0, 0, 0);
|
||||
this(null, url, referrerhash, null, null, null, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -132,8 +132,6 @@ public class Request extends WorkflowJob
|
|||
* @param appdate the time when the url was first time appeared
|
||||
* @param profileHandle the name of the prefetch profile. This must not be null!
|
||||
* @param depth the crawling depth of the entry
|
||||
* @param anchors number of anchors of the parent
|
||||
* @param forkfactor sum of anchors of all ancestors
|
||||
*/
|
||||
public Request(
|
||||
final byte[] initiator,
|
||||
|
@ -142,9 +140,7 @@ public class Request extends WorkflowJob
|
|||
final String name,
|
||||
final Date appdate,
|
||||
final String profileHandle,
|
||||
final int depth,
|
||||
final int anchors,
|
||||
final int forkfactor) {
|
||||
final int depth) {
|
||||
// create new entry and store it into database
|
||||
assert url != null;
|
||||
assert profileHandle == null || profileHandle.length() == Word.commonHashLength : profileHandle
|
||||
|
|
|
@ -108,8 +108,6 @@ public class SitemapImporter extends Thread {
|
|||
entry.url(),
|
||||
entry.lastmod(new Date()),
|
||||
this.crawlingProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0
|
||||
));
|
||||
logger.info("New URL '" + entry.url() + "' added for loading.");
|
||||
|
|
|
@ -197,7 +197,7 @@ public class YMarkCrawlStart extends HashMap<String,String>{
|
|||
null,
|
||||
"CRAWLING-ROOT",
|
||||
new Date(),
|
||||
pe.handle(), 0, 0, 0
|
||||
pe.handle(), 0
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,8 +74,6 @@ public class ProxyCacheHandler extends AbstractRemoteHandler implements Handler
|
|||
"",
|
||||
cachedResponseHeader.lastModified(),
|
||||
sb.crawler.defaultProxyProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
|
||||
final Response cachedResponse = new Response(
|
||||
|
|
|
@ -180,8 +180,6 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler {
|
|||
"",
|
||||
responseHeaderLegacy.lastModified(),
|
||||
sb.crawler.defaultProxyProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0); //sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete);
|
||||
final Response yacyResponse = new Response(
|
||||
yacyRequest,
|
||||
|
|
|
@ -128,8 +128,6 @@ public final class LoaderDispatcher {
|
|||
((global) ?
|
||||
this.sb.crawler.defaultMediaSnippetGlobalProfile.handle() :
|
||||
this.sb.crawler.defaultMediaSnippetLocalProfile.handle()), // crawl profile
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
}
|
||||
|
||||
|
|
|
@ -1941,8 +1941,6 @@ public final class Switchboard extends serverSwitch {
|
|||
"",
|
||||
surrogate.getDate(),
|
||||
this.crawler.defaultSurrogateProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null);
|
||||
final IndexingQueueEntry queueEntry =
|
||||
|
@ -2673,9 +2671,7 @@ public final class Switchboard extends serverSwitch {
|
|||
nextEntry.getValue(),
|
||||
new Date(),
|
||||
response.profile().handle(),
|
||||
nextdepth,
|
||||
0,
|
||||
0));
|
||||
nextdepth));
|
||||
} catch (final MalformedURLException e ) {
|
||||
ConcurrentLog.logException(e);
|
||||
}
|
||||
|
@ -3078,8 +3074,6 @@ public final class Switchboard extends serverSwitch {
|
|||
"CRAWLING-ROOT",
|
||||
new Date(),
|
||||
profile.handle(),
|
||||
0,
|
||||
0,
|
||||
0
|
||||
));
|
||||
|
||||
|
|
|
@ -357,8 +357,6 @@ public final class HTTPDProxyHandler {
|
|||
"",
|
||||
cachedResponseHeader.lastModified(),
|
||||
sb.crawler.defaultProxyProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
final Response response = new Response(
|
||||
request,
|
||||
|
@ -474,8 +472,6 @@ public final class HTTPDProxyHandler {
|
|||
"",
|
||||
responseHeader.lastModified(),
|
||||
sb.crawler.defaultProxyProfile.handle(),
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user