2007-06-25 14:41:54 +00:00

227 lines
11 KiB

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlProfile.entry;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
public class CrawlProfileEditor_p {
public static class eentry {
public static final int BOOLEAN = 0;
public static final int INTEGER = 1;
public static final int STRING = 2;
public final String name;
public final String label;
public final boolean readonly;
public final int type;
public eentry(String name, String label, boolean readonly, int type) {
this.name = name;
this.label = label;
this.readonly = readonly;
this.type = type;
private static final ArrayList /*<eentry>*/ labels = new ArrayList();
static {
labels.add(new eentry(entry.NAME, "Name", true, eentry.STRING));
labels.add(new eentry(entry.START_URL, "Start URL", true, eentry.STRING));
labels.add(new eentry(entry.GENERAL_FILTER, "General Filter", false, eentry.STRING));
labels.add(new eentry(entry.SPECIFIC_FILTER, "Specific Filter", false, eentry.STRING));
labels.add(new eentry(entry.GENERAL_DEPTH, "General Depth", false, eentry.INTEGER));
labels.add(new eentry(entry.SPECIFIC_DEPTH, "Specific Depth", false, eentry.INTEGER));
labels.add(new eentry(entry.RECRAWL_IF_OLDER, "Recrawl If Older", false, eentry.INTEGER));
labels.add(new eentry(entry.DOM_FILTER_DEPTH, "Domain Filter Depth", false, eentry.INTEGER));
labels.add(new eentry(entry.DOM_MAX_PAGES, "Domain Max. Pages", false, eentry.INTEGER));
labels.add(new eentry(entry.CRAWLING_Q, "CrawlingQ / '?'-URLs", false, eentry.BOOLEAN));
labels.add(new eentry(entry.INDEX_TEXT, "Index Text", false, eentry.BOOLEAN));
labels.add(new eentry(entry.INDEX_MEDIA, "Index Media", false, eentry.BOOLEAN));
labels.add(new eentry(entry.STORE_HTCACHE, "Store in HTCache", false, eentry.BOOLEAN));
labels.add(new eentry(entry.STORE_TXCACHE, "Store in TXCache", false, eentry.BOOLEAN));
labels.add(new eentry(entry.REMOTE_INDEXING, "Remote Indexing", false, eentry.BOOLEAN));
labels.add(new eentry(entry.XSSTOPW, "Static stop-words", false, eentry.BOOLEAN));
labels.add(new eentry(entry.XDSTOPW, "Dynamic stop-words", false, eentry.BOOLEAN));
labels.add(new eentry(entry.XPSTOPW, "Parent stop-words", false, eentry.BOOLEAN));
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
final servletProperties prop = new servletProperties();
final plasmaSwitchboard sb = (plasmaSwitchboard)env;
// read post for handle
String handle = (post == null) ? "" : post.get("handle", "");
if ((post != null) && (post.containsKey("deleteprofile"))) {
// deletion of a crawl
// generate handle list
int count = 0;
Iterator it = sb.profiles.profiles(true);
entry selentry;
while (it.hasNext()) {
selentry = (entry)it.next();
if (selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
prop.put("profiles_" + count + "_name", selentry.name());
prop.put("profiles_" + count + "_handle", selentry.handle());
if (handle.equals(selentry.handle()))
prop.put("profiles_" + count + "_selected", 1);
prop.put("profiles", count);
selentry = sb.profiles.getEntry(handle);
// read post for change submit
if ((post != null) && (selentry != null)) {
if (post.containsKey("submit")) {
try {
it = labels.iterator();
eentry tee;
while (it.hasNext()) {
tee = (eentry) it.next();
String cval = (String) selentry.map().get(tee.name);
String val = (tee.type == eentry.BOOLEAN) ? Boolean.toString(post.containsKey(tee.name)) : post.get(tee.name, cval);
if (!cval.equals(val)) selentry.changeEntry(tee.name, val);
} catch (IOException ex) {
prop.put("error", 1);
prop.put("error_message", ex.getMessage());
// generate crawl profile table
count = 0;
int domlistlength = (post == null) ? 160 : post.getInt("domlistlength", 160);
it = sb.profiles.profiles(true);
plasmaCrawlProfile.entry profile;
boolean dark = true;
while (it.hasNext()) {
profile = (plasmaCrawlProfile.entry) it.next();
prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_name", profile.name());
prop.put("crawlProfiles_"+count+"_startURL", profile.startURL());
prop.put("crawlProfiles_"+count+"_handle", profile.handle());
prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth());
prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter());
prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder());
prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : Integer.toString(profile.domFilterDepth()));
//start contrib [MN]
int i = 0;
String item;
while((i <= domlistlength) && !((item = profile.domName(true, i)).equals(""))){
if(i == domlistlength){
item = item + " ...";
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent_"+i+"_item", item);
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", i);
//end contrib [MN]
prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages());
prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_indexText", ((profile.indexText()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_indexMedia", ((profile.indexMedia()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_remoteIndexing", ((profile.remoteIndexing()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_deleteButton", (((profile.name().equals("remote")) ||
(profile.name().equals("proxy")) ||
(profile.name().equals("snippetText")) ||
(profile.name().equals("snippetMedia")) ? 0 : 1)));
prop.put("crawlProfiles_"+count+"_deleteButton_handle", profile.handle());
dark = !dark;
prop.put("crawlProfiles", count);
// generate edit field
if (selentry == null) {
prop.put("edit", 0);
} else {
prop.put("edit", 1);
prop.put("edit_name", selentry.name());
prop.put("edit_handle", selentry.handle());
it = labels.iterator();
count = 0;
while (it.hasNext()) {
eentry ee = (eentry) it.next();
Object val = selentry.map().get(ee.name);
prop.put("edit_entries_" + count + "_readonly", ee.readonly ? 1 : 0);
prop.put("edit_entries_" + count + "_readonly_name", ee.name);
prop.put("edit_entries_" + count + "_readonly_label", ee.label);
prop.put("edit_entries_" + count + "_readonly_type", ee.type);
if (ee.type == eentry.BOOLEAN) {
prop.put("edit_entries_" + count + "_readonly_type_checked", Boolean.valueOf((String) val).booleanValue() ? 1 : 0);
} else {
prop.put("edit_entries_" + count + "_readonly_type_value", val);
prop.put("edit_entries", count);
return prop;