From 5778428455bfa0800b6a3af31429861c329361bb Mon Sep 17 00:00:00 2001 From: borg-0300 Date: Fri, 11 Nov 2005 13:40:53 +0000 Subject: [PATCH] move cutUrlText to nxTools, max length from URLs(title) on searchpage now 120 chars git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1060 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaCrawlLURL.java | 36 +----------------- .../de/anomic/plasma/plasmaSwitchboard.java | 14 ++++--- source/de/anomic/tools/nxTools.java | 38 ++++++++++++++++++- 3 files changed, 47 insertions(+), 41 deletions(-) diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 09c064666..87f577aea 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -68,6 +68,7 @@ import de.anomic.server.serverCodings; import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.tools.crypt; +import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; @@ -301,39 +302,6 @@ public final class plasmaCrawlLURL extends plasmaURL { } } - /** - * This function shortens the String url
- * - * Example returns:
- *
normal domain:
http://domain.net/leftpath..rightpath
- *
long domain:
http://very_very_long_domain.net/le..
- * @param String like a URL - * @return the shorten or the old String - */ - public static String cutUrlText(String url, int len) { - // This is contributed by Thomas Quella (borg-0300) - int la = url.length(); - if (la > len) { - int cpos; - cpos = url.indexOf("://"); - if (cpos >= 0) { - cpos = url.indexOf("/", cpos + 3); - if (cpos >= 0) { - if (cpos < len-(len / 3)) { // at least 1/3 characters for the path - final int lb = ((len - cpos) / 2) - 1; - if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1 - url = url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb)); - } else { - url = url.substring(0, len - 2).concat(".."); - } - } else { // very crazy domain or very short len - url = url.substring(0, len - 2).concat(".."); - } // no slash at end - } // NO URL !? - } // URL < len - return url; - } - public serverObjects genTableProps(int tabletype, int lines, boolean showInit, boolean showExec, String dfltInit, String dfltExec, String feedbackpage, boolean makeLink) { serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps tabletype=" + tabletype + " lines=" + lines + " showInit=" + showInit + " showExec=" + showExec + @@ -387,7 +355,7 @@ public final class plasmaCrawlLURL extends plasmaURL { serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps Remove ':80' URL=" + txt); } - txt = cutUrlText(txt, 72); // shorten the string text like a URL + txt = nxTools.cutUrlText(txt, 72); // shorten the string text like a URL cachepath = (urle.url() == null) ? "-not-cached-" : url.substring(url.indexOf("://") + 3); if (cachepath.endsWith("/")) cachepath = cachepath + "ndx"; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 98412776d..fd531d73a 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -5,9 +5,9 @@ // first published on http://www.anomic.de // Frankfurt, Germany, 2004, 2005 // -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ +// $LastChangedDate:$ +// $LastChangedRevision:$ +// $LastChangedBy:$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -139,6 +139,7 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; import de.anomic.tools.bitfield; import de.anomic.tools.crypt; +import de.anomic.tools.nxTools; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; @@ -607,7 +608,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser (getConfig(STR_REMOTEPROFILE, "").length() == 0) || (profiles.getEntry(getConfig(STR_REMOTEPROFILE, "")) == null)) { // generate new default entry for remote crawling - defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false); +// defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false); + defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, true, true, true, true, true, true, false); setConfig(STR_REMOTEPROFILE, defaultRemoteProfile.handle()); } else { defaultRemoteProfile = profiles.getEntry(getConfig(STR_REMOTEPROFILE, "")); @@ -1657,7 +1659,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser prop.put("results_" + i + "_description", descr); prop.put("results_" + i + "_url", urlstring); prop.put("results_" + i + "_urlhash", urlhash); - prop.put("results_" + i + "_urlname", urlname); + prop.put("results_" + i + "_urlname", nxTools.cutUrlText(urlname, 120)); prop.put("results_" + i + "_date", dateString(urlentry.moddate())); prop.put("results_" + i + "_size", Long.toString(urlentry.size())); prop.put("results_" + i + "_words",URLEncoder.encode(query.queryWords.toString(),"UTF-8")); @@ -1667,7 +1669,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser prop.put("results_" + i + "_snippet_text", ""); } else { prop.put("results_" + i + "_snippet", 1); - prop.put("results_" + i + "_snippet_text", snippet.line.toString().trim()); + prop.put("results_" + i + "_snippet_text", snippet.line.trim()); } i++; } diff --git a/source/de/anomic/tools/nxTools.java b/source/de/anomic/tools/nxTools.java index 283827d83..e14c15810 100644 --- a/source/de/anomic/tools/nxTools.java +++ b/source/de/anomic/tools/nxTools.java @@ -3,7 +3,10 @@ // (C) by Michael Peter Christen; mc@anomic.de // first published on http://www.anomic.de // Frankfurt, Germany, 2004 -// last major change: 04.05.2004 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -141,4 +144,37 @@ public class nxTools { return null; } + /** + * This function shorten URL Strings
+ * + * Example returns:
+ *
normal domain:
http://domain.net/leftpath..rightpath
+ *
long domain:
http://very_very_long_domain.net/le..
+ * @param String like a URL + * @return the shorten or the old String + */ + public static String cutUrlText(String url, int len) { + // This is contributed by Thomas Quella (borg-0300) + int la = url.length(); + if (la > len) { + int cpos; + cpos = url.indexOf("://"); + if (cpos >= 0) { + cpos = url.indexOf("/", cpos + 3); + if (cpos >= 0) { + if (cpos < len-(len / 3)) { // at least 1/3 characters for the path + final int lb = ((len - cpos) / 2) - 1; + if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1 + return url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb)); + } else { + return url.substring(0, len - 2).concat(".."); + } + } else { // very crazy domain or very short len + return url.substring(0, len - 2).concat(".."); + } // no slash at end + } // NO URL !? + } // URL < len + return url; + } + }