mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-22 00:00:59 +02:00
dba7ef5144
- removed never-used secondary crawl depth - added a must-not-match filter that can be used to exclude urls from a crawl - added stub for crawl tags which will be used to identify search results that had been produced from specific crawls please update the yacybar: replace property name 'crawlFilter' with 'mustmatch'. Additionally, a new parameter named 'mustnotmatch' can be used, which should be by default the empty sring (match-never) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5342 6c8d7289-2bf4-0310-a012-ef5d649a1542
27 lines
1.0 KiB
XML
27 lines
1.0 KiB
XML
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<crawlProfiles>
|
|
#{crawlProfiles}#
|
|
<crawlProfile>
|
|
<name>#[name]#</name>
|
|
<status>#(status)#terminated::active#(/status)#</status>
|
|
<starturl>#[startURL]#</starturl>
|
|
<depth>#[depth]#</depth>
|
|
<mustmatch>#[mustmatch]#</mustmatch>
|
|
<mustnotmatch>#[mustnotmatch]#</mustnotmatch>
|
|
<crawlingIfOlder>#[crawlingIfOlder]#</crawlingIfOlder>
|
|
<crawlingDomFilterDepth>#[crawlingDomFilterDepth]#</crawlingDomFilterDepth>
|
|
<crawlingDomFilterContent>
|
|
#{crawlingDomFilterContent}#
|
|
<item>#[item]#</item>
|
|
#{/crawlingDomFilterContent}#
|
|
</crawlingDomFilterContent>
|
|
<crawlingDomMaxPages>#[crawlingDomMaxPages]#</crawlingDomMaxPages>
|
|
<withQuery>#(withQuery)#no::yes#(/withQuery)#</withQuery>
|
|
<storeCache>#(storeCache)#no::yes#(/storeCache)#</storeCache>
|
|
<indexText>#(indexText)#no::yes#(/indexText)#</indexText>
|
|
<indexMedia>#(indexMedia)#no::yes#(/indexMedia)#</indexMedia>
|
|
<remoteIndexing>#(remoteIndexing)#no::yes#(/remoteIndexing)#</remoteIndexing>
|
|
</crawlProfile>
|
|
#{/crawlProfiles}#
|
|
</crawlProfiles>
|