- improved version of robots.txt (delete your old htroot/robots.txt before updating):

- robots.txt is a servlet now
  - no need to rewrite the whole file each time a section is added or removed
  - user-defined disallows, added manually, won't be overwritten anymore
- new config-setting: httpd.robots.txt, holding names of the disallowed sections

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3423 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
karlchenofhell 2007-03-02 01:19:38 +00:00
parent 9623bf7bbe
commit 88245e44d8
8 changed files with 406 additions and 133 deletions

View File

@ -9,14 +9,17 @@
#%env/templates/submenuConfig.template%# #%env/templates/submenuConfig.template%#
<h2>Exclude Web-Spiders</h2> <h2>Exclude Web-Spiders</h2>
<p> <p>
Here you can set robots.txt-settings for all webcrawlers that try to access your peer.
<span class="tt">robots.txt</span> is a volunteer agreement most search-engines (including YaCy) follow.
It disallows crawlers to access webpages or even entire domains.
</p> </p>
#(error)# #(error)#
::<p class="error">Unable to access the local file: <span class="error tt">#[msg]#</span></p> ::<p class="error">Unable to access the local file: <span class="error tt">#[msg]#</span></p>
::<p class="error">Deletion of <span class="error tt">htroot/robots.txt</span> failed</p>#(/error)# ::<p class="error">Deletion of <span class="error tt">htroot/robots.txt</span> failed</p>#(/error)#
<form method="post" action="ConfigRobotsTxt_p.html" enctype="multipart/form-data" accept-charset="UTF-8"> <form method="post" action="ConfigRobotsTxt_p.html" enctype="multipart/form-data" accept-charset="UTF-8">
<input type="hidden" name="directories" value="checked" /> <fieldset><legend>Restrict access for</legend>
<input type="hidden" name="locked" value="checked" /> <input type="hidden" name="directories" value="checked" />
<fieldset><legend>Restrict access to</legend> <input type="hidden" name="locked" value="checked" />
<dl> <dl>
<dt><label for="all"><em>Entire Peer</em></label></dt> <dt><label for="all"><em>Entire Peer</em></label></dt>
<dd><input type="checkbox" id="all" name="all"#(all.checked)#:: checked="checked"#(/all.checked)# /></dd> <dd><input type="checkbox" id="all" name="all"#(all.checked)#:: checked="checked"#(/all.checked)# /></dd>

View File

@ -48,157 +48,56 @@
// javac -classpath .:../classes ConfigRobotsTxt_p.java // javac -classpath .:../classes ConfigRobotsTxt_p.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.http.httpdRobotsTxtConfig;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties; import de.anomic.server.servletProperties;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
public class ConfigRobotsTxt_p { public class ConfigRobotsTxt_p {
public static final Pattern entryBeginPattern = Pattern.compile("# (\\w*) \\((\\d*) entries\\)"); public static final Pattern entryBeginPattern = Pattern.compile("# (\\w*) \\((\\d*) entries\\)");
private static HashMap disallowMap = null;
private static Map getDisallowMap(String htrootPath) {
if (disallowMap == null) {
final File htroot = new File(htrootPath);
if (!htroot.exists()) return null;
disallowMap = new /* <String,String[]> */ HashMap();
final ArrayList htrootFiles = new ArrayList();
final ArrayList htrootDirs = new ArrayList();
final String[] htroots = htroot.list();
File file;
for (int i=0, dot; i<htroots.length; i++) {
if (htroots[i].equals("www")) continue;
file = new File(htroot, htroots[i]);
if (file.isDirectory()) {
htrootDirs.add("/" + file.getName());
} else if (
(dot = htroots[i].lastIndexOf('.')) < 2 ||
htroots[i].charAt(dot - 2) == '_' && htroots[i].charAt(dot - 1) == 'p'
) {
htrootFiles.add("/" + file.getName());
}
}
disallowMap.put("all", new String[] { "/" } );
disallowMap.put("locked", htrootFiles.toArray(new String[htrootFiles.size()]));
disallowMap.put("directories", htrootDirs.toArray(new String[htrootDirs.size()]));
disallowMap.put("blog", new String[] {
"/Blog.html",
"/Blog.xml",
"/BlogComments.html" } );
disallowMap.put("wiki", new String[] { "/Wiki.html" } );
disallowMap.put("bookmarks", new String[] { "/Bookmarks.html" } );
disallowMap.put("homepage", new String[] { "/www" } );
disallowMap.put("fileshare", new String[] { "/share" } );
disallowMap.put("surftips", new String[] { "/Surftips.html" } );
disallowMap.put("news", new String[] { "/News.html" } );
disallowMap.put("status", new String[] { "/Status.html" } );
disallowMap.put("network", new String[] {
"/Network.html",
"/Network.xml",
"/Network.csv" } );
}
return disallowMap;
}
public static servletProperties respond(httpHeader header, serverObjects post, serverSwitch env) { public static servletProperties respond(httpHeader header, serverObjects post, serverSwitch env) {
final servletProperties prop = new servletProperties(); final servletProperties prop = new servletProperties();
prop.put("address", yacyCore.seedDB.mySeed.getAddress()); httpdRobotsTxtConfig rbc = ((plasmaSwitchboard)env).robotstxtConfig;
prop.put("clientname", yacyCore.seedDB.mySeed.getAddress());
final String htroot = ((plasmaSwitchboard)env).getConfig(plasmaSwitchboard.HTROOT_PATH, plasmaSwitchboard.HTROOT_PATH_DEFAULT);
final File robots_txt = new File(htroot + File.separator + "robots.txt");
if (!robots_txt.exists()) try {
robots_txt.createNewFile();
} catch (IOException e) {
prop.put("error", 1);
prop.put("error_msg", e.getMessage());
}
if (post != null) { if (post != null) {
if (post.containsKey("save")) { if (post.containsKey("save")) {
try { rbc.setAllDisallowed(post.containsKey(httpdRobotsTxtConfig.ALL));
if (robots_txt.delete() && robots_txt.createNewFile()) { rbc.setBlogDisallowed(post.containsKey(httpdRobotsTxtConfig.BLOG));
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(robots_txt))); rbc.setBookmarksDisallowed(post.containsKey(httpdRobotsTxtConfig.BOOKMARKS));
printHeader(out); rbc.setDirsDisallowed(post.containsKey(httpdRobotsTxtConfig.DIRS));
rbc.setFileshareDisallowed(post.containsKey(httpdRobotsTxtConfig.FILESHARE));
final Iterator it = getDisallowMap(htroot).entrySet().iterator(); rbc.setHomepageDisallowed(post.containsKey(httpdRobotsTxtConfig.HOMEPAGE));
Map.Entry entry; rbc.setLockedDisallowed(post.containsKey(httpdRobotsTxtConfig.LOCKED));
while (it.hasNext()) { rbc.setNetworkDisallowed(post.containsKey(httpdRobotsTxtConfig.NETWORK));
entry = (Map.Entry)it.next(); rbc.setNewsDisallowed(post.containsKey(httpdRobotsTxtConfig.NEWS));
if (post.containsKey(entry.getKey())) { rbc.setStatusDisallowed(post.containsKey(httpdRobotsTxtConfig.STATUS));
out.println(); rbc.setSurftipsDisallowed(post.containsKey(httpdRobotsTxtConfig.SURFTIPS));
printEntry(out, entry); rbc.setWikiDisallowed(post.containsKey(httpdRobotsTxtConfig.WIKI));
} ((plasmaSwitchboard)env).setConfig(plasmaSwitchboard.ROBOTS_TXT, rbc.toString());
}
out.flush();
out.close();
} else {
prop.put("error", 2);
}
} catch (IOException e) {
serverLog.logSevere("ROBOTS.TXT", "Error writing " + robots_txt, e);
prop.put("error", 1);
prop.put("error_msg", e.getMessage());
}
} }
} }
// read htroot/robots.txt prop.put(httpdRobotsTxtConfig.ALL + ".checked", (rbc.isAllDisallowed()) ? 1 : 0);
try { prop.put(httpdRobotsTxtConfig.BLOG + ".checked", (rbc.isBlogDisallowed()) ? 1 : 0);
BufferedReader br = new BufferedReader(new FileReader(robots_txt)); prop.put(httpdRobotsTxtConfig.BOOKMARKS + ".checked", (rbc.isBookmarksDisallowed()) ? 1 : 0);
String line; prop.put(httpdRobotsTxtConfig.DIRS + ".checked", (rbc.isDirsDisallowed()) ? 1 : 0);
Matcher m; prop.put(httpdRobotsTxtConfig.FILESHARE + ".checked", (rbc.isFileshareDisallowed()) ? 1 : 0);
while ((line = br.readLine()) != null) { prop.put(httpdRobotsTxtConfig.HOMEPAGE + ".checked", (rbc.isHomepageDisallowed()) ? 1 : 0);
if ((m = entryBeginPattern.matcher(line)).matches()) prop.put(httpdRobotsTxtConfig.LOCKED + ".checked", (rbc.isLockedDisallowed()) ? 1 : 0);
prop.put(m.group(1) + ".checked", 1); prop.put(httpdRobotsTxtConfig.NETWORK + ".checked", (rbc.isNetworkDisallowed()) ? 1 : 0);
} prop.put(httpdRobotsTxtConfig.NEWS + ".checked", (rbc.isNewsDisallowed()) ? 1 : 0);
} catch (IOException e) { prop.put(httpdRobotsTxtConfig.STATUS + ".checked", (rbc.isStatusDisallowed()) ? 1 : 0);
prop.put("error", 1); prop.put(httpdRobotsTxtConfig.SURFTIPS + ".checked", (rbc.isSurftipsDisallowed()) ? 1 : 0);
prop.put("error_msg", e.getMessage()); prop.put(httpdRobotsTxtConfig.WIKI + ".checked", (rbc.isWikiDisallowed()) ? 1 : 0);
}
return prop; return prop;
} }
private static void printHeader(PrintWriter out) {
out.print("# robots.txt for ");
out.print(yacyCore.seedDB.mySeed.getName());
out.println(".yacy");
out.println();
out.println("User-agent: *");
}
private static void printEntry(PrintWriter out, Map.Entry entry) {
String[] disallows = (String[])entry.getValue();
out.print("# ");
out.print(entry.getKey());
out.print(" (");
out.print(disallows.length);
out.println(" entries)");
for (int i=0; i<disallows.length; i++) {
out.print("Disallow: ");
out.println(disallows[i]);
}
}
} }

71
htroot/robots.java Normal file
View File

@ -0,0 +1,71 @@
import java.io.File;
import java.util.ArrayList;
import de.anomic.http.httpHeader;
import de.anomic.http.httpdRobotsTxtConfig;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
public class robots {
public static servletProperties respond(httpHeader header, serverObjects post, serverSwitch env) {
final servletProperties prop = new servletProperties();
final httpdRobotsTxtConfig rbc = ((plasmaSwitchboard)env).robotstxtConfig;
if (rbc.isAllDisallowed()) {
prop.put(httpdRobotsTxtConfig.ALL, 1);
} else {
if (rbc.isBlogDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.BLOG, 1);
if (rbc.isBookmarksDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.BOOKMARKS, 1);
if (rbc.isFileshareDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.FILESHARE, 1);
if (rbc.isHomepageDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.HOMEPAGE, 1);
if (rbc.isNetworkDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.NETWORK, 1);
if (rbc.isNewsDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.NEWS, 1);
if (rbc.isStatusDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.STATUS, 1);
if (rbc.isSurftipsDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.SURFTIPS, 1);
if (rbc.isWikiDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.WIKI, 1);
if (rbc.isLockedDisallowed() || rbc.isDirsDisallowed()) {
final ArrayList[] p = getFiles(env.getConfig(plasmaSwitchboard.HTROOT_PATH, plasmaSwitchboard.HTROOT_PATH_DEFAULT));
if (rbc.isLockedDisallowed()) {
prop.put(httpdRobotsTxtConfig.LOCKED, p[0].size());
for (int i=0; i<p[0].size(); i++)
prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.LOCKED + "_" + i + "_page", p[0].get(i));
}
if (rbc.isDirsDisallowed()) {
prop.put(httpdRobotsTxtConfig.DIRS, p[1].size());
for (int i=0; i<p[1].size(); i++)
prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.DIRS + "_" + i + "_dir", p[1].get(i));
}
}
}
return prop;
}
private static ArrayList[] getFiles(String htrootPath) {
final File htroot = new File(htrootPath);
if (!htroot.exists()) return null;
final ArrayList htrootFiles = new ArrayList();
final ArrayList htrootDirs = new ArrayList();
final String[] htroots = htroot.list();
File file;
for (int i=0, dot; i<htroots.length; i++) {
if (htroots[i].equals("www")) continue;
file = new File(htroot, htroots[i]);
if (file.isDirectory()) {
htrootDirs.add("/" + file.getName());
} else if (
(dot = htroots[i].lastIndexOf('.')) < 2 ||
htroots[i].charAt(dot - 2) == '_' && htroots[i].charAt(dot - 1) == 'p'
) {
htrootFiles.add("/" + file.getName());
}
}
return new ArrayList[] { htrootFiles, htrootDirs };
}
}

54
htroot/robots.txt Normal file
View File

@ -0,0 +1,54 @@
# robots.txt for #[clientname]#.yacy
User-agent: *
#(all)#
#{dirs}#
# dirs
Disallow: /#[dir]##{/dirs}#
#{locked}#
# locked
Disallow: /#[page]##{/locked}#
#(wiki)#::
# wiki
Disallow: /Wiki.html#(/wiki)#
#(blog)#::
# blog
Disallow: /Blog.html
Disallow: /Blog.rss
Disallow: /Blog.xml#(/blog)#
#(news)#::
# news
Disallow: /News.html#(/news)#
#(status)#::
# status
Disallow: /Status.html#(/status)#
#(network)#::
# network
Disallow: /Network.html
Disallow: /Network.csv
Disallow: /Network.xml#(/network)#
#(homepage)#::
# homepage
Disallow: /www#(/homepage)#
#(fileshare)#::
# fileshare
Disallow: /share#(/fileshare)#
#(surftips)#::
# surftips
Disallow: /Surftips.html#(/surftips)#
::
# all
Disallow: /
#(/all)#

View File

@ -592,7 +592,8 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
path.endsWith("csv") || path.endsWith("csv") ||
path.endsWith("pac") || path.endsWith("pac") ||
path.endsWith("src") || path.endsWith("src") ||
path.endsWith("/")) { path.endsWith("/") ||
path.equals("/robots.txt")) {
/*targetFile = getLocalizedFile(path); /*targetFile = getLocalizedFile(path);
if (!(targetFile.exists())) { if (!(targetFile.exists())) {

View File

@ -0,0 +1,220 @@
// httpdRobotsTxtConfig.java
// ---------
// part of YaCy
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2007
// Created 22.02.2007
//
// This file is contributed by Franz Brauße
//
// $LastChangedDate: $
// $LastChangedRevision: $
// $LastChangedBy: $
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.http;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverSwitch;
public final class httpdRobotsTxtConfig {
public static final String WIKI = "wiki";
public static final String BLOG = "blog";
public static final String BOOKMARKS = "bookmarks";
public static final String HOMEPAGE = "homepage";
public static final String FILESHARE = "fileshare";
public static final String SURFTIPS = "surftips";
public static final String NEWS = "news";
public static final String STATUS = "status";
public static final String LOCKED = "locked";
public static final String DIRS = "dirs";
public static final String NETWORK = "network";
public static final String ALL = "all";
private boolean allDisallowed = false;
private boolean lockedDisallowed = true;
private boolean dirsDisallowed = true;
private boolean wikiDisallowed = false;
private boolean blogDisallowed = false;
private boolean fileshareDisallowed = false;
private boolean homepageDisallowed = false;
private boolean newsDisallowed = false;
private boolean statusDisallowed = false;
private boolean networkDisallowed = false;
private boolean surftipsDisallowed = false;
private boolean bookmarksDisallowed = false;
public httpdRobotsTxtConfig() { }
public httpdRobotsTxtConfig(String[] active) {
if (active == null) return;
for (int i=0; i<active.length; i++) {
if (active[i] == null) continue;
if (active[i].equals(BLOG)) { this.blogDisallowed = true; continue; }
if (active[i].equals(WIKI)) { this.wikiDisallowed = true; continue; }
if (active[i].equals(BOOKMARKS)) { this.bookmarksDisallowed = true; continue; }
if (active[i].equals(HOMEPAGE)) { this.homepageDisallowed = true; continue; }
if (active[i].equals(FILESHARE)) { this.fileshareDisallowed = true; continue; }
if (active[i].equals(SURFTIPS)) { this.surftipsDisallowed = true; continue; }
if (active[i].equals(NEWS)) { this.newsDisallowed = true; continue; }
if (active[i].equals(STATUS)) { this.statusDisallowed = true; continue; }
if (active[i].equals(NETWORK)) { this.networkDisallowed = true; continue; }
if (active[i].equals(LOCKED)) { this.lockedDisallowed = true; continue; }
if (active[i].equals(DIRS)) { this.dirsDisallowed = true; continue; }
if (active[i].equals(ALL)) { this.allDisallowed = true; continue; }
}
}
public static httpdRobotsTxtConfig init(serverSwitch env) {
String cfg = env.getConfig(plasmaSwitchboard.ROBOTS_TXT, plasmaSwitchboard.ROBOTS_TXT_DEFAULT);
if (cfg == null) return new httpdRobotsTxtConfig();
return new httpdRobotsTxtConfig(cfg.split(","));
}
public String toString() {
if (this.allDisallowed) return ALL;
StringBuffer sb = new StringBuffer();
if (this.blogDisallowed) sb.append(BLOG).append(",");
if (this.bookmarksDisallowed) sb.append(BOOKMARKS).append(",");
if (this.dirsDisallowed) sb.append(DIRS).append(",");
if (this.fileshareDisallowed) sb.append(FILESHARE).append(",");
if (this.homepageDisallowed) sb.append(HOMEPAGE).append(",");
if (this.lockedDisallowed) sb.append(LOCKED).append(",");
if (this.networkDisallowed) sb.append(NETWORK).append(",");
if (this.newsDisallowed) sb.append(NEWS).append(",");
if (this.statusDisallowed) sb.append(STATUS).append(",");
if (this.surftipsDisallowed) sb.append(SURFTIPS).append(",");
if (this.wikiDisallowed) sb.append(WIKI).append(",");
return sb.toString();
}
public boolean isAllDisallowed() {
return allDisallowed;
}
public void setAllDisallowed(boolean allDisallowed) {
this.allDisallowed = allDisallowed;
}
public boolean isLockedDisallowed() {
return lockedDisallowed || this.allDisallowed;
}
public void setLockedDisallowed(boolean lockedDisallowed) {
this.lockedDisallowed = lockedDisallowed;
}
public boolean isDirsDisallowed() {
return dirsDisallowed || this.allDisallowed;
}
public void setDirsDisallowed(boolean dirsDisallowed) {
this.dirsDisallowed = dirsDisallowed;
}
public boolean isBlogDisallowed() {
return blogDisallowed || this.allDisallowed;
}
public void setBlogDisallowed(boolean blogDisallowed) {
this.blogDisallowed = blogDisallowed;
}
public boolean isBookmarksDisallowed() {
return bookmarksDisallowed || this.allDisallowed;
}
public void setBookmarksDisallowed(boolean bookmarksDisallowed) {
this.bookmarksDisallowed = bookmarksDisallowed;
}
public boolean isFileshareDisallowed() {
return fileshareDisallowed || this.allDisallowed;
}
public void setFileshareDisallowed(boolean fileshareDisallowed) {
this.fileshareDisallowed = fileshareDisallowed;
}
public boolean isHomepageDisallowed() {
return homepageDisallowed || this.allDisallowed;
}
public void setHomepageDisallowed(boolean homepageDisallowed) {
this.homepageDisallowed = homepageDisallowed;
}
public boolean isNetworkDisallowed() {
return networkDisallowed || this.allDisallowed;
}
public void setNetworkDisallowed(boolean networkDisallowed) {
this.networkDisallowed = networkDisallowed;
}
public boolean isNewsDisallowed() {
return newsDisallowed || this.allDisallowed;
}
public void setNewsDisallowed(boolean newsDisallowed) {
this.newsDisallowed = newsDisallowed;
}
public boolean isStatusDisallowed() {
return statusDisallowed || this.allDisallowed;
}
public void setStatusDisallowed(boolean statusDisallowed) {
this.statusDisallowed = statusDisallowed;
}
public boolean isSurftipsDisallowed() {
return surftipsDisallowed || this.allDisallowed;
}
public void setSurftipsDisallowed(boolean surftipsDisallowed) {
this.surftipsDisallowed = surftipsDisallowed;
}
public boolean isWikiDisallowed() {
return wikiDisallowed || this.allDisallowed;
}
public void setWikiDisallowed(boolean wikiDisallowed) {
this.wikiDisallowed = wikiDisallowed;
}
}

View File

@ -132,6 +132,7 @@ import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc; import de.anomic.http.httpc;
import de.anomic.http.httpd; import de.anomic.http.httpd;
import de.anomic.http.httpdRobotsTxtConfig;
import de.anomic.index.indexContainer; import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexRWIEntryNew;
@ -262,6 +263,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// public String[] remoteProxyNoProxyPatterns = null; // public String[] remoteProxyNoProxyPatterns = null;
public httpRemoteProxyConfig remoteProxyConfig = null; public httpRemoteProxyConfig remoteProxyConfig = null;
public httpdRobotsTxtConfig robotstxtConfig = null;
/* /*
* Some constants * Some constants
@ -652,6 +655,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public static final String HTTPC_NAME_CACHE_CACHING_PATTERNS_NO = "httpc.nameCacheNoCachingPatterns"; public static final String HTTPC_NAME_CACHE_CACHING_PATTERNS_NO = "httpc.nameCacheNoCachingPatterns";
public static final String ROBOTS_TXT = "httpd.robots.txt";
public static final String ROBOTS_TXT_DEFAULT = httpdRobotsTxtConfig.LOCKED + " " + httpdRobotsTxtConfig.DIRS;
////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////
// Lists // Lists
////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////
@ -875,6 +881,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
this.remoteProxyConfig = httpRemoteProxyConfig.init(this); this.remoteProxyConfig = httpRemoteProxyConfig.init(this);
this.log.logConfig("Remote proxy configuration:\n" + this.remoteProxyConfig.toString()); this.log.logConfig("Remote proxy configuration:\n" + this.remoteProxyConfig.toString());
// set up local robots.txt
this.robotstxtConfig = httpdRobotsTxtConfig.init(this);
// setting timestamp of last proxy access // setting timestamp of last proxy access
this.proxyLastAccess = System.currentTimeMillis() - 60000; this.proxyLastAccess = System.currentTimeMillis() - 60000;
crg = new StringBuffer(maxCRGDump); crg = new StringBuffer(maxCRGDump);

View File

@ -869,3 +869,19 @@ rankingProfile =
#optional extern thumbnail program. #optional extern thumbnail program.
#the program must accept the invocation PROGRAM http://url /path/to/filename #the program must accept the invocation PROGRAM http://url /path/to/filename
thumbnailProgram = thumbnailProgram =
# settings for the peer's local robots.txt
# the following restrictions are possible (comma-separated):
# - all : entire domain is disallowed
# - blog : the blog-pages
# - bookmarks : the bookmark-page
# - dirs : all directories in htroot (standard setting, as there is no usable information in)
# - fileshare : all files in the peer's file share (DATA/HTDOCS/share)
# - homepage : all files on the peer's home page (DATA/HTDOCS/www)
# - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways)
# - news : the news-page
# - network : the network-pages
# - status : peer's status page
# - surftips : the surftips-page
# - wiki : the wiki-page
httpd.robots.txt = locked,dirs