mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
*) no more HTML in plasmaCrawlProfile.java anymore
*) <br> will not be displayed in items in Auto Filter Content on WatchCrawler_p.html anymore *) removed unnecessary replaceHTML() git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3425 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
93e1ad2bca
commit
ce360ef43e
|
@ -179,7 +179,7 @@
|
|||
<td>#[filter]#</td>
|
||||
<td>#[crawlingIfOlder]#</td>
|
||||
<td>#[crawlingDomFilterDepth]#</td>
|
||||
<td>#[crawlingDomFilterContent]#</td>
|
||||
<td>#{crawlingDomFilterContent}##[item]#<br />#{/crawlingDomFilterContent}#</td>
|
||||
<td>#[crawlingDomMaxPages]#</td>
|
||||
<td>#(withQuery)#no::yes#(/withQuery)#</td>
|
||||
<td>#(storeCache)#no::yes#(/storeCache)#</td>
|
||||
|
|
|
@ -1,29 +1,3 @@
|
|||
import java.io.File;
|
||||
import java.io.Writer;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import de.anomic.data.wikiCode;
|
||||
import de.anomic.htmlFilter.htmlFilterContentScraper;
|
||||
import de.anomic.htmlFilter.htmlFilterWriter;
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.kelondro.kelondroBitfield;
|
||||
import de.anomic.net.URL;
|
||||
import de.anomic.plasma.plasmaCrawlEURL;
|
||||
import de.anomic.plasma.plasmaCrawlProfile;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.plasma.plasmaURL;
|
||||
import de.anomic.server.serverFileUtils;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacyNewsRecord;
|
||||
|
||||
// WatchCrawler_p.java
|
||||
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
|
||||
// first published 18.12.2006 on http://www.anomic.de
|
||||
|
@ -51,6 +25,32 @@ import de.anomic.yacy.yacyNewsRecord;
|
|||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import java.io.File;
|
||||
import java.io.Writer;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import de.anomic.data.wikiCode;
|
||||
import de.anomic.htmlFilter.htmlFilterContentScraper;
|
||||
import de.anomic.htmlFilter.htmlFilterWriter;
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.kelondro.kelondroBitfield;
|
||||
import de.anomic.net.URL;
|
||||
import de.anomic.plasma.plasmaCrawlEURL;
|
||||
import de.anomic.plasma.plasmaCrawlProfile;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.plasma.plasmaURL;
|
||||
import de.anomic.server.serverFileUtils;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacyNewsRecord;
|
||||
|
||||
public class WatchCrawler_p {
|
||||
|
||||
// this servlet does NOT create the WatchCrawler page content!
|
||||
|
@ -332,14 +332,28 @@ public class WatchCrawler_p {
|
|||
while (it.hasNext()) {
|
||||
profile = (plasmaCrawlProfile.entry) it.next();
|
||||
prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0));
|
||||
prop.put("crawlProfiles_"+count+"_name", wikiCode.replaceHTML(profile.name()));
|
||||
prop.put("crawlProfiles_"+count+"_startURL", wikiCode.replaceHTML(profile.startURL()));
|
||||
prop.put("crawlProfiles_"+count+"_handle", wikiCode.replaceHTML(profile.handle()));
|
||||
prop.put("crawlProfiles_"+count+"_name", profile.name());
|
||||
prop.put("crawlProfiles_"+count+"_startURL", profile.startURL());
|
||||
prop.put("crawlProfiles_"+count+"_handle", profile.handle());
|
||||
prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth());
|
||||
prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter());
|
||||
prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder());
|
||||
prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : Integer.toString(profile.domFilterDepth()));
|
||||
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", profile.domNames(true, domlistlength));
|
||||
|
||||
//start contrib [MN]
|
||||
int i = 0;
|
||||
String item;
|
||||
while((i <= domlistlength) && !((item = profile.domName(true, i)).equals(""))){
|
||||
if(i == domlistlength){
|
||||
item = item + " ...";
|
||||
}
|
||||
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent_"+i+"_item", item);
|
||||
i++;
|
||||
}
|
||||
|
||||
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", i);
|
||||
//end contrib [MN]
|
||||
|
||||
prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages());
|
||||
prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0));
|
||||
prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0));
|
||||
|
|
|
@ -47,6 +47,7 @@ import java.util.HashMap;
|
|||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import de.anomic.kelondro.kelondroBase64Order;
|
||||
import de.anomic.kelondro.kelondroDyn;
|
||||
|
@ -461,21 +462,23 @@ public class plasmaCrawlProfile {
|
|||
if (domFilterDepth() == Integer.MAX_VALUE) return true;
|
||||
return doms.containsKey(domain);
|
||||
}
|
||||
public String domNames(boolean attr, int maxlength) {
|
||||
|
||||
public String domName(boolean attr, int index){
|
||||
Iterator domnamesi = doms.entrySet().iterator();
|
||||
String domnames="";
|
||||
String domname="";
|
||||
Map.Entry ey;
|
||||
DomProfile dp;
|
||||
while (domnamesi.hasNext()) {
|
||||
int i = 0;
|
||||
while ((domnamesi.hasNext()) && (i < index)) {
|
||||
ey = (Map.Entry) domnamesi.next();
|
||||
i++;
|
||||
}
|
||||
if(domnamesi.hasNext()){
|
||||
ey = (Map.Entry) domnamesi.next();
|
||||
dp = (DomProfile) ey.getValue();
|
||||
domnames += ((String) ey.getKey()) + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count + " ") : " ") + "<br>";
|
||||
if ((maxlength > 0) && (domnames.length() >= maxlength)) {
|
||||
domnames = domnames.substring(0, maxlength-3) + "...";
|
||||
break;
|
||||
domname = ((String) ey.getKey()) + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count) : " ");
|
||||
}
|
||||
}
|
||||
return domnames;
|
||||
return domname;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user