diff --git a/htroot/ConfigRobotsTxt_p.html b/htroot/ConfigRobotsTxt_p.html index e723d0462..472e85d1a 100644 --- a/htroot/ConfigRobotsTxt_p.html +++ b/htroot/ConfigRobotsTxt_p.html @@ -9,14 +9,17 @@ #%env/templates/submenuConfig.template%#

Exclude Web-Spiders

+ Here you can set robots.txt-settings for all webcrawlers that try to access your peer. + robots.txt is a volunteer agreement most search-engines (including YaCy) follow. + It disallows crawlers to access webpages or even entire domains.

#(error)# ::

Unable to access the local file: #[msg]#

::

Deletion of htroot/robots.txt failed

#(/error)#
- - -
Restrict access to +
Restrict access for + +
diff --git a/htroot/ConfigRobotsTxt_p.java b/htroot/ConfigRobotsTxt_p.java index c37e45ae7..0f0062322 100644 --- a/htroot/ConfigRobotsTxt_p.java +++ b/htroot/ConfigRobotsTxt_p.java @@ -48,157 +48,56 @@ // javac -classpath .:../classes ConfigRobotsTxt_p.java // if the shell's current path is HTROOT -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; import de.anomic.http.httpHeader; +import de.anomic.http.httpdRobotsTxtConfig; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.servletProperties; -import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; public class ConfigRobotsTxt_p { public static final Pattern entryBeginPattern = Pattern.compile("# (\\w*) \\((\\d*) entries\\)"); - private static HashMap disallowMap = null; - - private static Map getDisallowMap(String htrootPath) { - if (disallowMap == null) { - final File htroot = new File(htrootPath); - if (!htroot.exists()) return null; - disallowMap = new /* */ HashMap(); - final ArrayList htrootFiles = new ArrayList(); - final ArrayList htrootDirs = new ArrayList(); - final String[] htroots = htroot.list(); - File file; - for (int i=0, dot; i