mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-21 00:00:13 +02:00
move cutUrlText to nxTools,
max length from URLs(title) on searchpage now 120 chars git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1060 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
9158845c3b
commit
5778428455
|
@ -68,6 +68,7 @@ import de.anomic.server.serverCodings;
|
|||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.tools.crypt;
|
||||
import de.anomic.tools.nxTools;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
|
||||
|
@ -301,39 +302,6 @@ public final class plasmaCrawlLURL extends plasmaURL {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This function shortens the String url<br>
|
||||
*
|
||||
* Example returns:<br>
|
||||
* <dl><dt>normal domain:</dt><dd>http://domain.net/leftpath..rightpath</dd>
|
||||
* <dt>long domain:</dt><dd>http://very_very_long_domain.net/le..</dd></dl>
|
||||
* @param String like a URL
|
||||
* @return the shorten or the old String
|
||||
*/
|
||||
public static String cutUrlText(String url, int len) {
|
||||
// This is contributed by Thomas Quella (borg-0300)
|
||||
int la = url.length();
|
||||
if (la > len) {
|
||||
int cpos;
|
||||
cpos = url.indexOf("://");
|
||||
if (cpos >= 0) {
|
||||
cpos = url.indexOf("/", cpos + 3);
|
||||
if (cpos >= 0) {
|
||||
if (cpos < len-(len / 3)) { // at least 1/3 characters for the path
|
||||
final int lb = ((len - cpos) / 2) - 1;
|
||||
if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1
|
||||
url = url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb));
|
||||
} else {
|
||||
url = url.substring(0, len - 2).concat("..");
|
||||
}
|
||||
} else { // very crazy domain or very short len
|
||||
url = url.substring(0, len - 2).concat("..");
|
||||
} // no slash at end
|
||||
} // NO URL !?
|
||||
} // URL < len
|
||||
return url;
|
||||
}
|
||||
|
||||
public serverObjects genTableProps(int tabletype, int lines, boolean showInit, boolean showExec, String dfltInit, String dfltExec, String feedbackpage, boolean makeLink) {
|
||||
serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps tabletype=" + tabletype + " lines=" + lines +
|
||||
" showInit=" + showInit + " showExec=" + showExec +
|
||||
|
@ -387,7 +355,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
|
|||
serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps Remove ':80' URL=" + txt);
|
||||
}
|
||||
|
||||
txt = cutUrlText(txt, 72); // shorten the string text like a URL
|
||||
txt = nxTools.cutUrlText(txt, 72); // shorten the string text like a URL
|
||||
|
||||
cachepath = (urle.url() == null) ? "-not-cached-" : url.substring(url.indexOf("://") + 3);
|
||||
if (cachepath.endsWith("/")) cachepath = cachepath + "ndx";
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2004, 2005
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
// $LastChangedDate:$
|
||||
// $LastChangedRevision:$
|
||||
// $LastChangedBy:$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
|
@ -139,6 +139,7 @@ import de.anomic.server.serverFileUtils;
|
|||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.tools.bitfield;
|
||||
import de.anomic.tools.crypt;
|
||||
import de.anomic.tools.nxTools;
|
||||
import de.anomic.yacy.yacyClient;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
|
@ -607,7 +608,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
(getConfig(STR_REMOTEPROFILE, "").length() == 0) ||
|
||||
(profiles.getEntry(getConfig(STR_REMOTEPROFILE, "")) == null)) {
|
||||
// generate new default entry for remote crawling
|
||||
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false);
|
||||
// defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false);
|
||||
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, true, true, true, true, true, true, false);
|
||||
setConfig(STR_REMOTEPROFILE, defaultRemoteProfile.handle());
|
||||
} else {
|
||||
defaultRemoteProfile = profiles.getEntry(getConfig(STR_REMOTEPROFILE, ""));
|
||||
|
@ -1657,7 +1659,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
prop.put("results_" + i + "_description", descr);
|
||||
prop.put("results_" + i + "_url", urlstring);
|
||||
prop.put("results_" + i + "_urlhash", urlhash);
|
||||
prop.put("results_" + i + "_urlname", urlname);
|
||||
prop.put("results_" + i + "_urlname", nxTools.cutUrlText(urlname, 120));
|
||||
prop.put("results_" + i + "_date", dateString(urlentry.moddate()));
|
||||
prop.put("results_" + i + "_size", Long.toString(urlentry.size()));
|
||||
prop.put("results_" + i + "_words",URLEncoder.encode(query.queryWords.toString(),"UTF-8"));
|
||||
|
@ -1667,7 +1669,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
prop.put("results_" + i + "_snippet_text", "");
|
||||
} else {
|
||||
prop.put("results_" + i + "_snippet", 1);
|
||||
prop.put("results_" + i + "_snippet_text", snippet.line.toString().trim());
|
||||
prop.put("results_" + i + "_snippet_text", snippet.line.trim());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
// (C) by Michael Peter Christen; mc@anomic.de
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2004
|
||||
// last major change: 04.05.2004
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
|
@ -141,4 +144,37 @@ public class nxTools {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function shorten URL Strings<br>
|
||||
*
|
||||
* Example returns:<br>
|
||||
* <dl><dt>normal domain:</dt><dd>http://domain.net/leftpath..rightpath</dd>
|
||||
* <dt>long domain:</dt><dd>http://very_very_long_domain.net/le..</dd></dl>
|
||||
* @param String like a URL
|
||||
* @return the shorten or the old String
|
||||
*/
|
||||
public static String cutUrlText(String url, int len) {
|
||||
// This is contributed by Thomas Quella (borg-0300)
|
||||
int la = url.length();
|
||||
if (la > len) {
|
||||
int cpos;
|
||||
cpos = url.indexOf("://");
|
||||
if (cpos >= 0) {
|
||||
cpos = url.indexOf("/", cpos + 3);
|
||||
if (cpos >= 0) {
|
||||
if (cpos < len-(len / 3)) { // at least 1/3 characters for the path
|
||||
final int lb = ((len - cpos) / 2) - 1;
|
||||
if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1
|
||||
return url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb));
|
||||
} else {
|
||||
return url.substring(0, len - 2).concat("..");
|
||||
}
|
||||
} else { // very crazy domain or very short len
|
||||
return url.substring(0, len - 2).concat("..");
|
||||
} // no slash at end
|
||||
} // NO URL !?
|
||||
} // URL < len
|
||||
return url;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user