From eeba8b055e972f7c49d0e36024a632364bd77f85 Mon Sep 17 00:00:00 2001 From: theli Date: Tue, 14 Feb 2006 09:55:09 +0000 Subject: [PATCH] *) guessing, testing and suggesting alternative hostnames on "unknown host" error See: http://www.yacy-forum.de/viewtopic.php?t=1879 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1636 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/proxymsg/unknownHost.inc | 19 +++ source/de/anomic/data/robotsParser.java | 5 + source/de/anomic/http/httpd.java | 6 +- source/de/anomic/http/httpdProxyHandler.java | 127 ++++++++++++++++++- 4 files changed, 150 insertions(+), 7 deletions(-) create mode 100644 htroot/proxymsg/unknownHost.inc diff --git a/htroot/proxymsg/unknownHost.inc b/htroot/proxymsg/unknownHost.inc new file mode 100644 index 000000000..0676b7f82 --- /dev/null +++ b/htroot/proxymsg/unknownHost.inc @@ -0,0 +1,19 @@ + +
+

The server #[hostName]# could not be found.

+Did you mean: + + #{list}# + + + + #{/list}# +
#[hostName]#
+ diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java index 2948c5b2b..1a8a9d541 100644 --- a/source/de/anomic/data/robotsParser.java +++ b/source/de/anomic/data/robotsParser.java @@ -329,6 +329,11 @@ public final class robotsParser{ } else if (res.status.startsWith("3")) { // getting redirection URL String redirectionUrlString = (String) res.responseHeader.get(httpHeader.LOCATION); + if (redirectionUrlString==null) { + serverLog.logFinest("ROBOTS","robots.txt could not be downloaded from URL '" + robotsURL + "' because of missing redirecton header. [" + res.status + "]."); + robotsTxt = null; + } + redirectionUrlString = redirectionUrlString.trim(); // generating the new URL object diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java index 787e5e30d..845c1716d 100644 --- a/source/de/anomic/http/httpd.java +++ b/source/de/anomic/http/httpd.java @@ -322,7 +322,7 @@ public final class httpd implements serverHandler { if(entry.canSurf()){ return true; } - HashMap tp=new HashMap(); + serverObjects tp=new serverObjects(); tp.put("limit", "0");//time per day tp.put("limit_timelimit", entry.getTimeLimit()); sendRespondError(this.prop, this.session.out, 403, "Internet-Timelimit reached", new File("proxymsg/proxylimits.inc"), tp, null); @@ -993,7 +993,7 @@ public final class httpd implements serverHandler { int httpStatusCode, String httpStatusText, File detailedErrorMsgFile, - HashMap detailedErrorMsgValues, + serverObjects detailedErrorMsgValues, Exception stackTrace ) throws IOException { sendRespondError( @@ -1017,7 +1017,7 @@ public final class httpd implements serverHandler { String httpStatusText, String detailedErrorMsgText, Object detailedErrorMsgFile, - HashMap detailedErrorMsgValues, + serverObjects detailedErrorMsgValues, Exception stackTrace ) throws IOException { diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 6d5fed8e6..a0530bf64 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -71,14 +71,18 @@ import java.io.PrintWriter; import java.io.PushbackInputStream; import java.net.BindException; import java.net.ConnectException; +import java.net.InetAddress; import java.net.MalformedURLException; import java.net.NoRouteToHostException; import java.net.Socket; import java.net.SocketTimeoutException; import java.net.URL; import java.net.UnknownHostException; +import java.util.Arrays; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.Properties; import java.util.logging.FileHandler; import java.util.logging.Level; @@ -94,6 +98,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaURL; import de.anomic.server.serverCore; import de.anomic.server.serverFileUtils; +import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverMiniLogFormatter; @@ -1287,6 +1292,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt Exception errorExc = null; boolean unknownError = false; + // for customized error messages + boolean detailedErrorMsg = false; + String detailedErrorMsgFile = null; + serverObjects detailedErrorMsgMap = null; + if (e instanceof ConnectException) { httpStatusCode = 403; httpStatusText = "Connection refused"; errorMessage = "Connection refused by destination host"; @@ -1295,7 +1305,15 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt } else if (e instanceof NoRouteToHostException) { errorMessage = "No route to destination host"; } else if (e instanceof UnknownHostException) { - errorMessage = "IP address of the destination host could not be determined"; + //errorMessage = "IP address of the destination host could not be determined"; + try { + detailedErrorMsgMap = unknownHostHandling(conProp); + httpStatusText = "Unknown Host"; + detailedErrorMsg = true; + detailedErrorMsgFile = "proxymsg/unknownHost.inc"; + } catch (Exception e1) { + errorMessage = "IP address of the destination host could not be determined"; + } } else if (e instanceof SocketTimeoutException) { errorMessage = "Unable to establish a connection to the destination host. Connect timed out."; } else { @@ -1306,13 +1324,21 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt this.forceConnectionClose(); } else if ((exceptionMsg != null) && (exceptionMsg.indexOf("Connection reset")>= 0)) { errorMessage = "Connection reset"; + } else if ((exceptionMsg != null) && (exceptionMsg.indexOf("unknown host")>=0)) { + try { + detailedErrorMsgMap = unknownHostHandling(conProp); + httpStatusText = "Unknown Host"; + detailedErrorMsg = true; + detailedErrorMsgFile = "proxymsg/unknownHost.inc"; + } catch (Exception e1) { + errorMessage = "IP address of the destination host could not be determined"; + } } else if ((exceptionMsg != null) && ( - (exceptionMsg.indexOf("unknown host")>=0) || (exceptionMsg.indexOf("socket write error")>=0) || (exceptionMsg.indexOf("Read timed out") >= 0) || (exceptionMsg.indexOf("Broken pipe") >= 0) - )) { + )) { errorMessage = exceptionMsg; } else if ((remote != null)&&(remote.isClosed())) { // TODO: query for broken pipe @@ -1326,7 +1352,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt // sending back an error message to the client if (!conProp.containsKey(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { - httpd.sendRespondError(conProp,respond,4,httpStatusCode,httpStatusText,errorMessage,errorExc); + if (detailedErrorMsg) { + httpd.sendRespondError(conProp,respond, httpStatusCode, httpStatusText, new File(detailedErrorMsgFile), detailedErrorMsgMap, errorExc); + } else { + httpd.sendRespondError(conProp,respond,4,httpStatusCode,httpStatusText,errorMessage,errorExc); + } } else { if (unknownError) { this.theLogger.logFine("Error while processing request '" + @@ -1347,6 +1377,95 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt } + private serverObjects unknownHostHandling(Properties conProp) throws Exception { + serverObjects detailedErrorMsgMap = new serverObjects(); + + // generic toplevel domains + HashSet topLevelDomains = new HashSet(Arrays.asList(new String[]{ + "aero", // Fluggesellschaften/Luftfahrt + "arpa", // Einrichtung des ARPANet + "biz", // Business + "com", // Commercial + "coop", // genossenschaftliche Unternehmen + "edu", // Education + "gov", // Government + "info", // Informationsangebote + "int", // International + "jobs", // Jobangebote von Unternemen + "mil", // Military (US-Militär) + // "museum", // Museen + "name", // Privatpersonen + "nato", // NATO (veraltet) + "net", // Net (Netzwerkbetreiber) + "org", // Organization (Nichtkommerzielle Organisation) + "pro", // Professionals + "travel", // Touristikindustrie + + // some country tlds + "de", + "at", + "ch", + "it", + "uk" + })); + + // getting some connection properties + String orgHostPort = "80"; + String orgHostName = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST,"unknown").toLowerCase(); + int pos = orgHostName.indexOf(":"); + if (pos != -1) { + orgHostPort = orgHostName.substring(pos+1); + orgHostName = orgHostName.substring(0,pos); + } + String orgHostPath = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH,""); + String orgHostArgs = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS,""); + if (orgHostArgs.length() > 0) orgHostArgs = "?" + orgHostArgs; + detailedErrorMsgMap.put("hostName", orgHostName); + + // guessing hostnames + HashSet testHostNames = new HashSet(); + String testHostName = null; + if (!orgHostName.startsWith("www.")) { + testHostName = "www." + orgHostName; + InetAddress addr = httpc.dnsResolve(testHostName); + if (addr != null) testHostNames.add(testHostName); + } else if (orgHostName.startsWith("www.")) { + testHostName = orgHostName.substring(4); + InetAddress addr = httpc.dnsResolve(testHostName); + if (addr != null) if (addr != null) testHostNames.add(testHostName); + } + if (orgHostName.length()>4 && orgHostName.startsWith("www") && (orgHostName.charAt(3) != '.')) { + testHostName = orgHostName.substring(0,3) + "." + orgHostName.substring(3); + InetAddress addr = httpc.dnsResolve(testHostName); + if (addr != null) if (addr != null) testHostNames.add(testHostName); + } + + pos = orgHostName.lastIndexOf("."); + if (pos != -1) { + Iterator iter = topLevelDomains.iterator(); + while (iter.hasNext()) { + String topLevelDomain = (String) iter.next(); + testHostName = orgHostName.substring(0,pos) + "." + topLevelDomain; + InetAddress addr = httpc.dnsResolve(testHostName); + if (addr != null) if (addr != null) testHostNames.add(testHostName); + } + } + + int hostNameCount = 0; + Iterator iter = testHostNames.iterator(); + while (iter.hasNext()) { + testHostName = (String) iter.next(); + detailedErrorMsgMap.put("list_" + hostNameCount + "_hostName",testHostName); + detailedErrorMsgMap.put("list_" + hostNameCount + "_hostPort",orgHostPort); + detailedErrorMsgMap.put("list_" + hostNameCount + "_hostPath",orgHostPath); + detailedErrorMsgMap.put("list_" + hostNameCount + "_hostArgs",orgHostArgs); + hostNameCount++; + } + + detailedErrorMsgMap.put("list", hostNameCount); + return detailedErrorMsgMap; + } + private String generateUserAgent(httpHeader requestHeaders) { this.userAgentStr.setLength(0);