move cutUrlText to nxTools,

max length from URLs(title) on searchpage now 120 chars


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1060 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
borg-0300 2005-11-11 13:40:53 +00:00
parent 9158845c3b
commit 5778428455
3 changed files with 47 additions and 41 deletions

View File

@ -68,6 +68,7 @@ import de.anomic.server.serverCodings;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
@ -301,39 +302,6 @@ public final class plasmaCrawlLURL extends plasmaURL {
}
}
/**
* This function shortens the String url<br>
*
* Example returns:<br>
* <dl><dt>normal domain:</dt><dd>http://domain.net/leftpath..rightpath</dd>
* <dt>long domain:</dt><dd>http://very_very_long_domain.net/le..</dd></dl>
* @param String like a URL
* @return the shorten or the old String
*/
public static String cutUrlText(String url, int len) {
// This is contributed by Thomas Quella (borg-0300)
int la = url.length();
if (la > len) {
int cpos;
cpos = url.indexOf("://");
if (cpos >= 0) {
cpos = url.indexOf("/", cpos + 3);
if (cpos >= 0) {
if (cpos < len-(len / 3)) { // at least 1/3 characters for the path
final int lb = ((len - cpos) / 2) - 1;
if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1
url = url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb));
} else {
url = url.substring(0, len - 2).concat("..");
}
} else { // very crazy domain or very short len
url = url.substring(0, len - 2).concat("..");
} // no slash at end
} // NO URL !?
} // URL < len
return url;
}
public serverObjects genTableProps(int tabletype, int lines, boolean showInit, boolean showExec, String dfltInit, String dfltExec, String feedbackpage, boolean makeLink) {
serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps tabletype=" + tabletype + " lines=" + lines +
" showInit=" + showInit + " showExec=" + showExec +
@ -387,7 +355,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps Remove ':80' URL=" + txt);
}
txt = cutUrlText(txt, 72); // shorten the string text like a URL
txt = nxTools.cutUrlText(txt, 72); // shorten the string text like a URL
cachepath = (urle.url() == null) ? "-not-cached-" : url.substring(url.indexOf("://") + 3);
if (cachepath.endsWith("/")) cachepath = cachepath + "ndx";

View File

@ -5,9 +5,9 @@
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004, 2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
// $LastChangedDate:$
// $LastChangedRevision:$
// $LastChangedBy:$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -139,6 +139,7 @@ import de.anomic.server.serverFileUtils;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.bitfield;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
@ -607,7 +608,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
(getConfig(STR_REMOTEPROFILE, "").length() == 0) ||
(profiles.getEntry(getConfig(STR_REMOTEPROFILE, "")) == null)) {
// generate new default entry for remote crawling
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false);
// defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false);
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, true, true, true, true, true, true, false);
setConfig(STR_REMOTEPROFILE, defaultRemoteProfile.handle());
} else {
defaultRemoteProfile = profiles.getEntry(getConfig(STR_REMOTEPROFILE, ""));
@ -1657,7 +1659,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
prop.put("results_" + i + "_description", descr);
prop.put("results_" + i + "_url", urlstring);
prop.put("results_" + i + "_urlhash", urlhash);
prop.put("results_" + i + "_urlname", urlname);
prop.put("results_" + i + "_urlname", nxTools.cutUrlText(urlname, 120));
prop.put("results_" + i + "_date", dateString(urlentry.moddate()));
prop.put("results_" + i + "_size", Long.toString(urlentry.size()));
prop.put("results_" + i + "_words",URLEncoder.encode(query.queryWords.toString(),"UTF-8"));
@ -1667,7 +1669,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
prop.put("results_" + i + "_snippet_text", "");
} else {
prop.put("results_" + i + "_snippet", 1);
prop.put("results_" + i + "_snippet_text", snippet.line.toString().trim());
prop.put("results_" + i + "_snippet_text", snippet.line.trim());
}
i++;
}

View File

@ -3,7 +3,10 @@
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 04.05.2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -141,4 +144,37 @@ public class nxTools {
return null;
}
/**
* This function shorten URL Strings<br>
*
* Example returns:<br>
* <dl><dt>normal domain:</dt><dd>http://domain.net/leftpath..rightpath</dd>
* <dt>long domain:</dt><dd>http://very_very_long_domain.net/le..</dd></dl>
* @param String like a URL
* @return the shorten or the old String
*/
public static String cutUrlText(String url, int len) {
// This is contributed by Thomas Quella (borg-0300)
int la = url.length();
if (la > len) {
int cpos;
cpos = url.indexOf("://");
if (cpos >= 0) {
cpos = url.indexOf("/", cpos + 3);
if (cpos >= 0) {
if (cpos < len-(len / 3)) { // at least 1/3 characters for the path
final int lb = ((len - cpos) / 2) - 1;
if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1
return url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb));
} else {
return url.substring(0, len - 2).concat("..");
}
} else { // very crazy domain or very short len
return url.substring(0, len - 2).concat("..");
} // no slash at end
} // NO URL !?
} // URL < len
return url;
}
}