*) no more HTML in plasmaCrawlProfile.java anymore

*) <br> will not be displayed in items in Auto Filter Content on WatchCrawler_p.html anymore
*) removed unnecessary replaceHTML()


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3425 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
low012 2007-03-02 21:09:28 +00:00
parent 93e1ad2bca
commit ce360ef43e
3 changed files with 57 additions and 40 deletions

View File

@ -179,7 +179,7 @@
<td>#[filter]#</td> <td>#[filter]#</td>
<td>#[crawlingIfOlder]#</td> <td>#[crawlingIfOlder]#</td>
<td>#[crawlingDomFilterDepth]#</td> <td>#[crawlingDomFilterDepth]#</td>
<td>#[crawlingDomFilterContent]#</td> <td>#{crawlingDomFilterContent}##[item]#<br />#{/crawlingDomFilterContent}#</td>
<td>#[crawlingDomMaxPages]#</td> <td>#[crawlingDomMaxPages]#</td>
<td>#(withQuery)#no::yes#(/withQuery)#</td> <td>#(withQuery)#no::yes#(/withQuery)#</td>
<td>#(storeCache)#no::yes#(/storeCache)#</td> <td>#(storeCache)#no::yes#(/storeCache)#</td>

View File

@ -1,29 +1,3 @@
import java.io.File;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import de.anomic.data.wikiCode;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsRecord;
// WatchCrawler_p.java // WatchCrawler_p.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany // (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 18.12.2006 on http://www.anomic.de // first published 18.12.2006 on http://www.anomic.de
@ -51,6 +25,32 @@ import de.anomic.yacy.yacyNewsRecord;
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.File;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import de.anomic.data.wikiCode;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsRecord;
public class WatchCrawler_p { public class WatchCrawler_p {
// this servlet does NOT create the WatchCrawler page content! // this servlet does NOT create the WatchCrawler page content!
@ -332,14 +332,28 @@ public class WatchCrawler_p {
while (it.hasNext()) { while (it.hasNext()) {
profile = (plasmaCrawlProfile.entry) it.next(); profile = (plasmaCrawlProfile.entry) it.next();
prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0)); prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_name", wikiCode.replaceHTML(profile.name())); prop.put("crawlProfiles_"+count+"_name", profile.name());
prop.put("crawlProfiles_"+count+"_startURL", wikiCode.replaceHTML(profile.startURL())); prop.put("crawlProfiles_"+count+"_startURL", profile.startURL());
prop.put("crawlProfiles_"+count+"_handle", wikiCode.replaceHTML(profile.handle())); prop.put("crawlProfiles_"+count+"_handle", profile.handle());
prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth()); prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth());
prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter()); prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter());
prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder()); prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder());
prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : Integer.toString(profile.domFilterDepth())); prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : Integer.toString(profile.domFilterDepth()));
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", profile.domNames(true, domlistlength));
//start contrib [MN]
int i = 0;
String item;
while((i <= domlistlength) && !((item = profile.domName(true, i)).equals(""))){
if(i == domlistlength){
item = item + " ...";
}
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent_"+i+"_item", item);
i++;
}
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", i);
//end contrib [MN]
prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages()); prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages());
prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0)); prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0)); prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0));

View File

@ -47,6 +47,7 @@ import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroDyn;
@ -461,21 +462,23 @@ public class plasmaCrawlProfile {
if (domFilterDepth() == Integer.MAX_VALUE) return true; if (domFilterDepth() == Integer.MAX_VALUE) return true;
return doms.containsKey(domain); return doms.containsKey(domain);
} }
public String domNames(boolean attr, int maxlength) {
public String domName(boolean attr, int index){
Iterator domnamesi = doms.entrySet().iterator(); Iterator domnamesi = doms.entrySet().iterator();
String domnames=""; String domname="";
Map.Entry ey; Map.Entry ey;
DomProfile dp; DomProfile dp;
while (domnamesi.hasNext()) { int i = 0;
while ((domnamesi.hasNext()) && (i < index)) {
ey = (Map.Entry) domnamesi.next();
i++;
}
if(domnamesi.hasNext()){
ey = (Map.Entry) domnamesi.next(); ey = (Map.Entry) domnamesi.next();
dp = (DomProfile) ey.getValue(); dp = (DomProfile) ey.getValue();
domnames += ((String) ey.getKey()) + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count + " ") : " ") + "<br>"; domname = ((String) ey.getKey()) + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count) : " ");
if ((maxlength > 0) && (domnames.length() >= maxlength)) {
domnames = domnames.substring(0, maxlength-3) + "...";
break;
} }
} return domname;
return domnames;
} }
} }
} }