mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added an option to put image links to the crawl queue and handle these
like normal documents. Using this option (by default on at this moment; this might change soon) it is possible to get the exif data into the search index to be used in image search.
This commit is contained in:
parent
e8e558a9b7
commit
69f85265e1
|
@ -796,6 +796,11 @@ search.excludehosth=
|
|||
# the cases of nocache, iffresh and ifexist causes an index deletion
|
||||
search.verify.delete = true
|
||||
|
||||
# images may be treated either as documents that are shown in search results or as objects
|
||||
# that are only visible in special search environments, like image search
|
||||
search.excludeintext.image = true
|
||||
crawler.load.image = true;
|
||||
|
||||
# remote search details
|
||||
remotesearch.maxcount = 10
|
||||
remotesearch.maxtime = 3000
|
||||
|
|
|
@ -336,9 +336,10 @@ public final class CrawlStacker {
|
|||
|
||||
// check availability of parser and maxfilesize
|
||||
String warning = null;
|
||||
boolean loadImages = Switchboard.getSwitchboard().getConfigBool("crawler.load.image", true);
|
||||
if ((maxFileSize >= 0 && entry.size() > maxFileSize) ||
|
||||
entry.url().getContentDomain() == ContentDomain.APP ||
|
||||
entry.url().getContentDomain() == ContentDomain.IMAGE ||
|
||||
(!loadImages && entry.url().getContentDomain() == ContentDomain.IMAGE) ||
|
||||
entry.url().getContentDomain() == ContentDomain.AUDIO ||
|
||||
entry.url().getContentDomain() == ContentDomain.VIDEO ||
|
||||
entry.url().getContentDomain() == ContentDomain.CTRL) {
|
||||
|
|
|
@ -2512,10 +2512,12 @@ public final class Switchboard extends serverSwitch {
|
|||
) {
|
||||
// get the hyperlinks
|
||||
final Map<DigestURI, String> hl = Document.getHyperlinks(documents);
|
||||
boolean loadImages = getConfigBool("crawler.load.image", true);
|
||||
if (loadImages) hl.putAll(Document.getImagelinks(documents));
|
||||
|
||||
// add all media links also to the crawl stack. They will be re-sorted to the NOLOAD queue and indexed afterwards as pure links
|
||||
if (response.profile().directDocByURL()) {
|
||||
hl.putAll(Document.getImagelinks(documents));
|
||||
if (!loadImages) hl.putAll(Document.getImagelinks(documents));
|
||||
hl.putAll(Document.getApplinks(documents));
|
||||
hl.putAll(Document.getVideolinks(documents));
|
||||
hl.putAll(Document.getAudiolinks(documents));
|
||||
|
|
Loading…
Reference in New Issue
Block a user