mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- improved version of robots.txt (delete your old htroot/robots.txt before updating):
- robots.txt is a servlet now - no need to rewrite the whole file each time a section is added or removed - user-defined disallows, added manually, won't be overwritten anymore - new config-setting: httpd.robots.txt, holding names of the disallowed sections git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3423 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
9623bf7bbe
commit
88245e44d8
|
@ -9,14 +9,17 @@
|
|||
#%env/templates/submenuConfig.template%#
|
||||
<h2>Exclude Web-Spiders</h2>
|
||||
<p>
|
||||
Here you can set robots.txt-settings for all webcrawlers that try to access your peer.
|
||||
<span class="tt">robots.txt</span> is a volunteer agreement most search-engines (including YaCy) follow.
|
||||
It disallows crawlers to access webpages or even entire domains.
|
||||
</p>
|
||||
#(error)#
|
||||
::<p class="error">Unable to access the local file: <span class="error tt">#[msg]#</span></p>
|
||||
::<p class="error">Deletion of <span class="error tt">htroot/robots.txt</span> failed</p>#(/error)#
|
||||
<form method="post" action="ConfigRobotsTxt_p.html" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<input type="hidden" name="directories" value="checked" />
|
||||
<input type="hidden" name="locked" value="checked" />
|
||||
<fieldset><legend>Restrict access to</legend>
|
||||
<fieldset><legend>Restrict access for</legend>
|
||||
<input type="hidden" name="directories" value="checked" />
|
||||
<input type="hidden" name="locked" value="checked" />
|
||||
<dl>
|
||||
<dt><label for="all"><em>Entire Peer</em></label></dt>
|
||||
<dd><input type="checkbox" id="all" name="all"#(all.checked)#:: checked="checked"#(/all.checked)# /></dd>
|
||||
|
|
|
@ -48,157 +48,56 @@
|
|||
// javac -classpath .:../classes ConfigRobotsTxt_p.java
|
||||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.http.httpdRobotsTxtConfig;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.servletProperties;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
|
||||
public class ConfigRobotsTxt_p {
|
||||
|
||||
public static final Pattern entryBeginPattern = Pattern.compile("# (\\w*) \\((\\d*) entries\\)");
|
||||
|
||||
private static HashMap disallowMap = null;
|
||||
|
||||
private static Map getDisallowMap(String htrootPath) {
|
||||
if (disallowMap == null) {
|
||||
final File htroot = new File(htrootPath);
|
||||
if (!htroot.exists()) return null;
|
||||
disallowMap = new /* <String,String[]> */ HashMap();
|
||||
final ArrayList htrootFiles = new ArrayList();
|
||||
final ArrayList htrootDirs = new ArrayList();
|
||||
final String[] htroots = htroot.list();
|
||||
File file;
|
||||
for (int i=0, dot; i<htroots.length; i++) {
|
||||
if (htroots[i].equals("www")) continue;
|
||||
file = new File(htroot, htroots[i]);
|
||||
if (file.isDirectory()) {
|
||||
htrootDirs.add("/" + file.getName());
|
||||
} else if (
|
||||
(dot = htroots[i].lastIndexOf('.')) < 2 ||
|
||||
htroots[i].charAt(dot - 2) == '_' && htroots[i].charAt(dot - 1) == 'p'
|
||||
) {
|
||||
htrootFiles.add("/" + file.getName());
|
||||
}
|
||||
}
|
||||
|
||||
disallowMap.put("all", new String[] { "/" } );
|
||||
disallowMap.put("locked", htrootFiles.toArray(new String[htrootFiles.size()]));
|
||||
disallowMap.put("directories", htrootDirs.toArray(new String[htrootDirs.size()]));
|
||||
disallowMap.put("blog", new String[] {
|
||||
"/Blog.html",
|
||||
"/Blog.xml",
|
||||
"/BlogComments.html" } );
|
||||
disallowMap.put("wiki", new String[] { "/Wiki.html" } );
|
||||
disallowMap.put("bookmarks", new String[] { "/Bookmarks.html" } );
|
||||
disallowMap.put("homepage", new String[] { "/www" } );
|
||||
disallowMap.put("fileshare", new String[] { "/share" } );
|
||||
disallowMap.put("surftips", new String[] { "/Surftips.html" } );
|
||||
disallowMap.put("news", new String[] { "/News.html" } );
|
||||
disallowMap.put("status", new String[] { "/Status.html" } );
|
||||
disallowMap.put("network", new String[] {
|
||||
"/Network.html",
|
||||
"/Network.xml",
|
||||
"/Network.csv" } );
|
||||
}
|
||||
return disallowMap;
|
||||
}
|
||||
|
||||
public static servletProperties respond(httpHeader header, serverObjects post, serverSwitch env) {
|
||||
final servletProperties prop = new servletProperties();
|
||||
|
||||
prop.put("address", yacyCore.seedDB.mySeed.getAddress());
|
||||
|
||||
final String htroot = ((plasmaSwitchboard)env).getConfig(plasmaSwitchboard.HTROOT_PATH, plasmaSwitchboard.HTROOT_PATH_DEFAULT);
|
||||
final File robots_txt = new File(htroot + File.separator + "robots.txt");
|
||||
if (!robots_txt.exists()) try {
|
||||
robots_txt.createNewFile();
|
||||
} catch (IOException e) {
|
||||
prop.put("error", 1);
|
||||
prop.put("error_msg", e.getMessage());
|
||||
}
|
||||
httpdRobotsTxtConfig rbc = ((plasmaSwitchboard)env).robotstxtConfig;
|
||||
prop.put("clientname", yacyCore.seedDB.mySeed.getAddress());
|
||||
|
||||
if (post != null) {
|
||||
if (post.containsKey("save")) {
|
||||
try {
|
||||
if (robots_txt.delete() && robots_txt.createNewFile()) {
|
||||
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(robots_txt)));
|
||||
printHeader(out);
|
||||
|
||||
final Iterator it = getDisallowMap(htroot).entrySet().iterator();
|
||||
Map.Entry entry;
|
||||
while (it.hasNext()) {
|
||||
entry = (Map.Entry)it.next();
|
||||
if (post.containsKey(entry.getKey())) {
|
||||
out.println();
|
||||
printEntry(out, entry);
|
||||
}
|
||||
}
|
||||
out.flush();
|
||||
out.close();
|
||||
} else {
|
||||
prop.put("error", 2);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
serverLog.logSevere("ROBOTS.TXT", "Error writing " + robots_txt, e);
|
||||
prop.put("error", 1);
|
||||
prop.put("error_msg", e.getMessage());
|
||||
}
|
||||
rbc.setAllDisallowed(post.containsKey(httpdRobotsTxtConfig.ALL));
|
||||
rbc.setBlogDisallowed(post.containsKey(httpdRobotsTxtConfig.BLOG));
|
||||
rbc.setBookmarksDisallowed(post.containsKey(httpdRobotsTxtConfig.BOOKMARKS));
|
||||
rbc.setDirsDisallowed(post.containsKey(httpdRobotsTxtConfig.DIRS));
|
||||
rbc.setFileshareDisallowed(post.containsKey(httpdRobotsTxtConfig.FILESHARE));
|
||||
rbc.setHomepageDisallowed(post.containsKey(httpdRobotsTxtConfig.HOMEPAGE));
|
||||
rbc.setLockedDisallowed(post.containsKey(httpdRobotsTxtConfig.LOCKED));
|
||||
rbc.setNetworkDisallowed(post.containsKey(httpdRobotsTxtConfig.NETWORK));
|
||||
rbc.setNewsDisallowed(post.containsKey(httpdRobotsTxtConfig.NEWS));
|
||||
rbc.setStatusDisallowed(post.containsKey(httpdRobotsTxtConfig.STATUS));
|
||||
rbc.setSurftipsDisallowed(post.containsKey(httpdRobotsTxtConfig.SURFTIPS));
|
||||
rbc.setWikiDisallowed(post.containsKey(httpdRobotsTxtConfig.WIKI));
|
||||
((plasmaSwitchboard)env).setConfig(plasmaSwitchboard.ROBOTS_TXT, rbc.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// read htroot/robots.txt
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new FileReader(robots_txt));
|
||||
String line;
|
||||
Matcher m;
|
||||
while ((line = br.readLine()) != null) {
|
||||
if ((m = entryBeginPattern.matcher(line)).matches())
|
||||
prop.put(m.group(1) + ".checked", 1);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
prop.put("error", 1);
|
||||
prop.put("error_msg", e.getMessage());
|
||||
}
|
||||
|
||||
prop.put(httpdRobotsTxtConfig.ALL + ".checked", (rbc.isAllDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.BLOG + ".checked", (rbc.isBlogDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.BOOKMARKS + ".checked", (rbc.isBookmarksDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.DIRS + ".checked", (rbc.isDirsDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.FILESHARE + ".checked", (rbc.isFileshareDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.HOMEPAGE + ".checked", (rbc.isHomepageDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.LOCKED + ".checked", (rbc.isLockedDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.NETWORK + ".checked", (rbc.isNetworkDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.NEWS + ".checked", (rbc.isNewsDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.STATUS + ".checked", (rbc.isStatusDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.SURFTIPS + ".checked", (rbc.isSurftipsDisallowed()) ? 1 : 0);
|
||||
prop.put(httpdRobotsTxtConfig.WIKI + ".checked", (rbc.isWikiDisallowed()) ? 1 : 0);
|
||||
return prop;
|
||||
}
|
||||
|
||||
private static void printHeader(PrintWriter out) {
|
||||
out.print("# robots.txt for ");
|
||||
out.print(yacyCore.seedDB.mySeed.getName());
|
||||
out.println(".yacy");
|
||||
out.println();
|
||||
out.println("User-agent: *");
|
||||
}
|
||||
|
||||
private static void printEntry(PrintWriter out, Map.Entry entry) {
|
||||
String[] disallows = (String[])entry.getValue();
|
||||
out.print("# ");
|
||||
out.print(entry.getKey());
|
||||
out.print(" (");
|
||||
out.print(disallows.length);
|
||||
out.println(" entries)");
|
||||
|
||||
for (int i=0; i<disallows.length; i++) {
|
||||
out.print("Disallow: ");
|
||||
out.println(disallows[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
71
htroot/robots.java
Normal file
71
htroot/robots.java
Normal file
|
@ -0,0 +1,71 @@
|
|||
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.http.httpdRobotsTxtConfig;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.servletProperties;
|
||||
|
||||
public class robots {
|
||||
|
||||
public static servletProperties respond(httpHeader header, serverObjects post, serverSwitch env) {
|
||||
final servletProperties prop = new servletProperties();
|
||||
final httpdRobotsTxtConfig rbc = ((plasmaSwitchboard)env).robotstxtConfig;
|
||||
|
||||
if (rbc.isAllDisallowed()) {
|
||||
prop.put(httpdRobotsTxtConfig.ALL, 1);
|
||||
} else {
|
||||
if (rbc.isBlogDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.BLOG, 1);
|
||||
if (rbc.isBookmarksDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.BOOKMARKS, 1);
|
||||
if (rbc.isFileshareDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.FILESHARE, 1);
|
||||
if (rbc.isHomepageDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.HOMEPAGE, 1);
|
||||
if (rbc.isNetworkDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.NETWORK, 1);
|
||||
if (rbc.isNewsDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.NEWS, 1);
|
||||
if (rbc.isStatusDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.STATUS, 1);
|
||||
if (rbc.isSurftipsDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.SURFTIPS, 1);
|
||||
if (rbc.isWikiDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.WIKI, 1);
|
||||
|
||||
if (rbc.isLockedDisallowed() || rbc.isDirsDisallowed()) {
|
||||
final ArrayList[] p = getFiles(env.getConfig(plasmaSwitchboard.HTROOT_PATH, plasmaSwitchboard.HTROOT_PATH_DEFAULT));
|
||||
if (rbc.isLockedDisallowed()) {
|
||||
prop.put(httpdRobotsTxtConfig.LOCKED, p[0].size());
|
||||
for (int i=0; i<p[0].size(); i++)
|
||||
prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.LOCKED + "_" + i + "_page", p[0].get(i));
|
||||
}
|
||||
if (rbc.isDirsDisallowed()) {
|
||||
prop.put(httpdRobotsTxtConfig.DIRS, p[1].size());
|
||||
for (int i=0; i<p[1].size(); i++)
|
||||
prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.DIRS + "_" + i + "_dir", p[1].get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return prop;
|
||||
}
|
||||
|
||||
private static ArrayList[] getFiles(String htrootPath) {
|
||||
final File htroot = new File(htrootPath);
|
||||
if (!htroot.exists()) return null;
|
||||
final ArrayList htrootFiles = new ArrayList();
|
||||
final ArrayList htrootDirs = new ArrayList();
|
||||
final String[] htroots = htroot.list();
|
||||
File file;
|
||||
for (int i=0, dot; i<htroots.length; i++) {
|
||||
if (htroots[i].equals("www")) continue;
|
||||
file = new File(htroot, htroots[i]);
|
||||
if (file.isDirectory()) {
|
||||
htrootDirs.add("/" + file.getName());
|
||||
} else if (
|
||||
(dot = htroots[i].lastIndexOf('.')) < 2 ||
|
||||
htroots[i].charAt(dot - 2) == '_' && htroots[i].charAt(dot - 1) == 'p'
|
||||
) {
|
||||
htrootFiles.add("/" + file.getName());
|
||||
}
|
||||
}
|
||||
return new ArrayList[] { htrootFiles, htrootDirs };
|
||||
}
|
||||
}
|
54
htroot/robots.txt
Normal file
54
htroot/robots.txt
Normal file
|
@ -0,0 +1,54 @@
|
|||
# robots.txt for #[clientname]#.yacy
|
||||
|
||||
User-agent: *
|
||||
|
||||
#(all)#
|
||||
|
||||
#{dirs}#
|
||||
# dirs
|
||||
Disallow: /#[dir]##{/dirs}#
|
||||
|
||||
#{locked}#
|
||||
# locked
|
||||
Disallow: /#[page]##{/locked}#
|
||||
|
||||
#(wiki)#::
|
||||
# wiki
|
||||
Disallow: /Wiki.html#(/wiki)#
|
||||
|
||||
#(blog)#::
|
||||
# blog
|
||||
Disallow: /Blog.html
|
||||
Disallow: /Blog.rss
|
||||
Disallow: /Blog.xml#(/blog)#
|
||||
|
||||
#(news)#::
|
||||
# news
|
||||
Disallow: /News.html#(/news)#
|
||||
|
||||
#(status)#::
|
||||
# status
|
||||
Disallow: /Status.html#(/status)#
|
||||
|
||||
#(network)#::
|
||||
# network
|
||||
Disallow: /Network.html
|
||||
Disallow: /Network.csv
|
||||
Disallow: /Network.xml#(/network)#
|
||||
|
||||
#(homepage)#::
|
||||
# homepage
|
||||
Disallow: /www#(/homepage)#
|
||||
|
||||
#(fileshare)#::
|
||||
# fileshare
|
||||
Disallow: /share#(/fileshare)#
|
||||
|
||||
#(surftips)#::
|
||||
# surftips
|
||||
Disallow: /Surftips.html#(/surftips)#
|
||||
|
||||
::
|
||||
# all
|
||||
Disallow: /
|
||||
#(/all)#
|
|
@ -592,7 +592,8 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
|
|||
path.endsWith("csv") ||
|
||||
path.endsWith("pac") ||
|
||||
path.endsWith("src") ||
|
||||
path.endsWith("/")) {
|
||||
path.endsWith("/") ||
|
||||
path.equals("/robots.txt")) {
|
||||
|
||||
/*targetFile = getLocalizedFile(path);
|
||||
if (!(targetFile.exists())) {
|
||||
|
|
220
source/de/anomic/http/httpdRobotsTxtConfig.java
Normal file
220
source/de/anomic/http/httpdRobotsTxtConfig.java
Normal file
|
@ -0,0 +1,220 @@
|
|||
// httpdRobotsTxtConfig.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@anomic.de
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brauße
|
||||
//
|
||||
// $LastChangedDate: $
|
||||
// $LastChangedRevision: $
|
||||
// $LastChangedBy: $
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
//
|
||||
// Using this software in any meaning (reading, learning, copying, compiling,
|
||||
// running) means that you agree that the Author(s) is (are) not responsible
|
||||
// for cost, loss of data or any harm that may be caused directly or indirectly
|
||||
// by usage of this softare or this documentation. The usage of this software
|
||||
// is on your own risk. The installation and usage (starting/running) of this
|
||||
// software may allow other people or application to access your computer and
|
||||
// any attached devices and is highly dependent on the configuration of the
|
||||
// software which must be done by the user of the software; the author(s) is
|
||||
// (are) also not responsible for proper configuration and usage of the
|
||||
// software, even if provoked by documentation provided together with
|
||||
// the software.
|
||||
//
|
||||
// Any changes to this file according to the GPL as documented in the file
|
||||
// gpl.txt aside this file in the shipment you received can be done to the
|
||||
// lines that follows this copyright notice here, but changes must not be
|
||||
// done inside the copyright notive above. A re-distribution must contain
|
||||
// the intact and unchanged copyright notice.
|
||||
// Contributions and changes to the program code must be marked as such.
|
||||
|
||||
package de.anomic.http;
|
||||
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public final class httpdRobotsTxtConfig {
|
||||
|
||||
public static final String WIKI = "wiki";
|
||||
public static final String BLOG = "blog";
|
||||
public static final String BOOKMARKS = "bookmarks";
|
||||
public static final String HOMEPAGE = "homepage";
|
||||
public static final String FILESHARE = "fileshare";
|
||||
public static final String SURFTIPS = "surftips";
|
||||
public static final String NEWS = "news";
|
||||
public static final String STATUS = "status";
|
||||
public static final String LOCKED = "locked";
|
||||
public static final String DIRS = "dirs";
|
||||
public static final String NETWORK = "network";
|
||||
public static final String ALL = "all";
|
||||
|
||||
private boolean allDisallowed = false;
|
||||
private boolean lockedDisallowed = true;
|
||||
private boolean dirsDisallowed = true;
|
||||
private boolean wikiDisallowed = false;
|
||||
private boolean blogDisallowed = false;
|
||||
private boolean fileshareDisallowed = false;
|
||||
private boolean homepageDisallowed = false;
|
||||
private boolean newsDisallowed = false;
|
||||
private boolean statusDisallowed = false;
|
||||
private boolean networkDisallowed = false;
|
||||
private boolean surftipsDisallowed = false;
|
||||
private boolean bookmarksDisallowed = false;
|
||||
|
||||
public httpdRobotsTxtConfig() { }
|
||||
|
||||
public httpdRobotsTxtConfig(String[] active) {
|
||||
if (active == null) return;
|
||||
for (int i=0; i<active.length; i++) {
|
||||
if (active[i] == null) continue;
|
||||
if (active[i].equals(BLOG)) { this.blogDisallowed = true; continue; }
|
||||
if (active[i].equals(WIKI)) { this.wikiDisallowed = true; continue; }
|
||||
if (active[i].equals(BOOKMARKS)) { this.bookmarksDisallowed = true; continue; }
|
||||
if (active[i].equals(HOMEPAGE)) { this.homepageDisallowed = true; continue; }
|
||||
if (active[i].equals(FILESHARE)) { this.fileshareDisallowed = true; continue; }
|
||||
if (active[i].equals(SURFTIPS)) { this.surftipsDisallowed = true; continue; }
|
||||
if (active[i].equals(NEWS)) { this.newsDisallowed = true; continue; }
|
||||
if (active[i].equals(STATUS)) { this.statusDisallowed = true; continue; }
|
||||
if (active[i].equals(NETWORK)) { this.networkDisallowed = true; continue; }
|
||||
if (active[i].equals(LOCKED)) { this.lockedDisallowed = true; continue; }
|
||||
if (active[i].equals(DIRS)) { this.dirsDisallowed = true; continue; }
|
||||
if (active[i].equals(ALL)) { this.allDisallowed = true; continue; }
|
||||
}
|
||||
}
|
||||
|
||||
public static httpdRobotsTxtConfig init(serverSwitch env) {
|
||||
String cfg = env.getConfig(plasmaSwitchboard.ROBOTS_TXT, plasmaSwitchboard.ROBOTS_TXT_DEFAULT);
|
||||
if (cfg == null) return new httpdRobotsTxtConfig();
|
||||
return new httpdRobotsTxtConfig(cfg.split(","));
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
if (this.allDisallowed) return ALL;
|
||||
StringBuffer sb = new StringBuffer();
|
||||
if (this.blogDisallowed) sb.append(BLOG).append(",");
|
||||
if (this.bookmarksDisallowed) sb.append(BOOKMARKS).append(",");
|
||||
if (this.dirsDisallowed) sb.append(DIRS).append(",");
|
||||
if (this.fileshareDisallowed) sb.append(FILESHARE).append(",");
|
||||
if (this.homepageDisallowed) sb.append(HOMEPAGE).append(",");
|
||||
if (this.lockedDisallowed) sb.append(LOCKED).append(",");
|
||||
if (this.networkDisallowed) sb.append(NETWORK).append(",");
|
||||
if (this.newsDisallowed) sb.append(NEWS).append(",");
|
||||
if (this.statusDisallowed) sb.append(STATUS).append(",");
|
||||
if (this.surftipsDisallowed) sb.append(SURFTIPS).append(",");
|
||||
if (this.wikiDisallowed) sb.append(WIKI).append(",");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public boolean isAllDisallowed() {
|
||||
return allDisallowed;
|
||||
}
|
||||
|
||||
public void setAllDisallowed(boolean allDisallowed) {
|
||||
this.allDisallowed = allDisallowed;
|
||||
}
|
||||
|
||||
public boolean isLockedDisallowed() {
|
||||
return lockedDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setLockedDisallowed(boolean lockedDisallowed) {
|
||||
this.lockedDisallowed = lockedDisallowed;
|
||||
}
|
||||
|
||||
public boolean isDirsDisallowed() {
|
||||
return dirsDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setDirsDisallowed(boolean dirsDisallowed) {
|
||||
this.dirsDisallowed = dirsDisallowed;
|
||||
}
|
||||
|
||||
public boolean isBlogDisallowed() {
|
||||
return blogDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setBlogDisallowed(boolean blogDisallowed) {
|
||||
this.blogDisallowed = blogDisallowed;
|
||||
}
|
||||
|
||||
public boolean isBookmarksDisallowed() {
|
||||
return bookmarksDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setBookmarksDisallowed(boolean bookmarksDisallowed) {
|
||||
this.bookmarksDisallowed = bookmarksDisallowed;
|
||||
}
|
||||
|
||||
public boolean isFileshareDisallowed() {
|
||||
return fileshareDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setFileshareDisallowed(boolean fileshareDisallowed) {
|
||||
this.fileshareDisallowed = fileshareDisallowed;
|
||||
}
|
||||
|
||||
public boolean isHomepageDisallowed() {
|
||||
return homepageDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setHomepageDisallowed(boolean homepageDisallowed) {
|
||||
this.homepageDisallowed = homepageDisallowed;
|
||||
}
|
||||
|
||||
public boolean isNetworkDisallowed() {
|
||||
return networkDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setNetworkDisallowed(boolean networkDisallowed) {
|
||||
this.networkDisallowed = networkDisallowed;
|
||||
}
|
||||
|
||||
public boolean isNewsDisallowed() {
|
||||
return newsDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setNewsDisallowed(boolean newsDisallowed) {
|
||||
this.newsDisallowed = newsDisallowed;
|
||||
}
|
||||
|
||||
public boolean isStatusDisallowed() {
|
||||
return statusDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setStatusDisallowed(boolean statusDisallowed) {
|
||||
this.statusDisallowed = statusDisallowed;
|
||||
}
|
||||
|
||||
public boolean isSurftipsDisallowed() {
|
||||
return surftipsDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setSurftipsDisallowed(boolean surftipsDisallowed) {
|
||||
this.surftipsDisallowed = surftipsDisallowed;
|
||||
}
|
||||
|
||||
public boolean isWikiDisallowed() {
|
||||
return wikiDisallowed || this.allDisallowed;
|
||||
}
|
||||
|
||||
public void setWikiDisallowed(boolean wikiDisallowed) {
|
||||
this.wikiDisallowed = wikiDisallowed;
|
||||
}
|
||||
}
|
|
@ -132,6 +132,7 @@ import de.anomic.http.httpHeader;
|
|||
import de.anomic.http.httpRemoteProxyConfig;
|
||||
import de.anomic.http.httpc;
|
||||
import de.anomic.http.httpd;
|
||||
import de.anomic.http.httpdRobotsTxtConfig;
|
||||
import de.anomic.index.indexContainer;
|
||||
import de.anomic.index.indexRWIEntry;
|
||||
import de.anomic.index.indexRWIEntryNew;
|
||||
|
@ -262,6 +263,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
// public String[] remoteProxyNoProxyPatterns = null;
|
||||
public httpRemoteProxyConfig remoteProxyConfig = null;
|
||||
|
||||
public httpdRobotsTxtConfig robotstxtConfig = null;
|
||||
|
||||
|
||||
/*
|
||||
* Some constants
|
||||
|
@ -652,6 +655,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
|
||||
public static final String HTTPC_NAME_CACHE_CACHING_PATTERNS_NO = "httpc.nameCacheNoCachingPatterns";
|
||||
|
||||
public static final String ROBOTS_TXT = "httpd.robots.txt";
|
||||
public static final String ROBOTS_TXT_DEFAULT = httpdRobotsTxtConfig.LOCKED + " " + httpdRobotsTxtConfig.DIRS;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lists
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -875,6 +881,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
this.remoteProxyConfig = httpRemoteProxyConfig.init(this);
|
||||
this.log.logConfig("Remote proxy configuration:\n" + this.remoteProxyConfig.toString());
|
||||
|
||||
// set up local robots.txt
|
||||
this.robotstxtConfig = httpdRobotsTxtConfig.init(this);
|
||||
|
||||
// setting timestamp of last proxy access
|
||||
this.proxyLastAccess = System.currentTimeMillis() - 60000;
|
||||
crg = new StringBuffer(maxCRGDump);
|
||||
|
|
16
yacy.init
16
yacy.init
|
@ -869,3 +869,19 @@ rankingProfile =
|
|||
#optional extern thumbnail program.
|
||||
#the program must accept the invocation PROGRAM http://url /path/to/filename
|
||||
thumbnailProgram =
|
||||
|
||||
# settings for the peer's local robots.txt
|
||||
# the following restrictions are possible (comma-separated):
|
||||
# - all : entire domain is disallowed
|
||||
# - blog : the blog-pages
|
||||
# - bookmarks : the bookmark-page
|
||||
# - dirs : all directories in htroot (standard setting, as there is no usable information in)
|
||||
# - fileshare : all files in the peer's file share (DATA/HTDOCS/share)
|
||||
# - homepage : all files on the peer's home page (DATA/HTDOCS/www)
|
||||
# - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways)
|
||||
# - news : the news-page
|
||||
# - network : the network-pages
|
||||
# - status : peer's status page
|
||||
# - surftips : the surftips-page
|
||||
# - wiki : the wiki-page
|
||||
httpd.robots.txt = locked,dirs
|
||||
|
|
Loading…
Reference in New Issue
Block a user