yacy_search_server/htroot/CrawlProfileEditor_p.xml
orbiter dba7ef5144 extended crawling constraints:
- removed never-used secondary crawl depth
- added a must-not-match filter that can be used to exclude urls from a crawl
- added stub for crawl tags which will be used to identify search results that had been produced from specific crawls
please update the yacybar: replace property name 'crawlFilter' with 'mustmatch'.
Additionally, a new parameter named 'mustnotmatch' can be used, which should be by default the empty sring (match-never)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5342 6c8d7289-2bf4-0310-a012-ef5d649a1542
2008-11-14 09:58:56 +00:00

27 lines
1.0 KiB
XML

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<crawlProfiles>
#{crawlProfiles}#
<crawlProfile>
<name>#[name]#</name>
<status>#(status)#terminated::active#(/status)#</status>
<starturl>#[startURL]#</starturl>
<depth>#[depth]#</depth>
<mustmatch>#[mustmatch]#</mustmatch>
<mustnotmatch>#[mustnotmatch]#</mustnotmatch>
<crawlingIfOlder>#[crawlingIfOlder]#</crawlingIfOlder>
<crawlingDomFilterDepth>#[crawlingDomFilterDepth]#</crawlingDomFilterDepth>
<crawlingDomFilterContent>
#{crawlingDomFilterContent}#
<item>#[item]#</item>
#{/crawlingDomFilterContent}#
</crawlingDomFilterContent>
<crawlingDomMaxPages>#[crawlingDomMaxPages]#</crawlingDomMaxPages>
<withQuery>#(withQuery)#no::yes#(/withQuery)#</withQuery>
<storeCache>#(storeCache)#no::yes#(/storeCache)#</storeCache>
<indexText>#(indexText)#no::yes#(/indexText)#</indexText>
<indexMedia>#(indexMedia)#no::yes#(/indexMedia)#</indexMedia>
<remoteIndexing>#(remoteIndexing)#no::yes#(/remoteIndexing)#</remoteIndexing>
</crawlProfile>
#{/crawlProfiles}#
</crawlProfiles>