added a file type navigator

added a protocol navigator

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7795 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2011-06-23 15:39:52 +00:00
parent 31283ecd07
commit 84c9658644
7 changed files with 134 additions and 24 deletions

View File

@ -740,7 +740,7 @@ search.result.show.pictures = true
# search navigators: comma-separated list of default values for search navigation.
# can be temporary different if search string is given with differen navigation values
# assigning no value(s) means that no navigation is shown
search.navigation=hosts,authors,namespace,topics
search.navigation=hosts,authors,namespace,topics,filetype,protocol
# search result verification and snippet fetch caching rules
# each search result can be verified byloading the link from the web

View File

@ -91,9 +91,11 @@ $(function() {
header: "h3"
});
$("#sidebarDomains").accordion({});
$("#sidebarNameSpace").accordion({});
$("#sidebarProtocols").accordion({});
$("#sidebarFiletypes").accordion({});
$("#sidebarAuthors").accordion({});
$("#sidebarAuthors").accordion('activate', false);
$("#sidebarNameSpace").accordion({});
$("#sidebarTopics").tagcloud({type:"sphere",power:.25,seed:0,sizemin:10,sizemax:20,height:80,colormin:"682",colormax:"20C"}).find("li").tsort();
$("#sidebarAbout").accordion({});
$("#search").focus();

View File

@ -16,6 +16,24 @@
</div>
#(/nav-topics)#
#(nav-protocols)#::
<div id="sidebarProtocols" style="float: right; margin-top:5px; width: 220px;">
<h3 style="padding-left:25px;">Protocol Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
</div>
#(/nav-protocols)#
#(nav-filetypes)#::
<div id="sidebarFiletypes" style="float: right; margin-top:5px; width: 220px;">
<h3 style="padding-left:25px;">Filetype Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
</div>
#(/nav-filetypes)#
#(nav-domains)#::
<div id="sidebarDomains" style="float: right; margin-top:5px; width: 220px;">
<h3 style="padding-left:25px;">Domain Navigator</h3>

View File

@ -163,6 +163,58 @@ public class yacysearchtrailer {
prop.put("nav-topics_element_" + i + "_nl", 0);
}
// protocol navigators
final ScoreMap<String> protocolNavigator = theSearch.getProtocolNavigator();
if (protocolNavigator == null || protocolNavigator.isEmpty()) {
prop.put("nav-protocols", 0);
} else {
prop.put("nav-protocols", 1);
navigatorIterator = protocolNavigator.keys(false);
int i = 0;
String pnav;
while (i < 20 && navigatorIterator.hasNext()) {
name = navigatorIterator.next().trim();
count = protocolNavigator.get(name);
pnav = "/" + name;
prop.putJSON("nav-protocols_element_" + i + "_name", name);
prop.put("nav-protocols_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, theQuery, theQuery.queryStringForUrl() + "+" + pnav, theQuery.urlMask.toString(), theQuery.navigators).toString() + "\">" + name + " (" + count + ")</a>");
prop.putJSON("nav-protocols_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + pnav, theQuery.urlMask.toString(), theQuery.navigators).toString());
prop.put("nav-protocols_element_" + i + "_count", count);
prop.put("nav-protocols_element_" + i + "_modifier", "protocol:'" + name + "'");
prop.put("nav-protocols_element_" + i + "_nl", 1);
i++;
}
prop.put("nav-protocols_element", i);
i--;
prop.put("nav-protocols_element_" + i + "_nl", 0);
}
// filetype navigators
final ScoreMap<String> filetypeNavigator = theSearch.getFiletypeNavigator();
if (filetypeNavigator == null || filetypeNavigator.isEmpty()) {
prop.put("nav-filetypes", 0);
} else {
prop.put("nav-filetypes", 1);
navigatorIterator = filetypeNavigator.keys(false);
int i = 0;
String tnav;
while (i < 20 && navigatorIterator.hasNext()) {
name = navigatorIterator.next().trim();
count = filetypeNavigator.get(name);
tnav = "filetype:" + name;
prop.putJSON("nav-filetypes_element_" + i + "_name", name);
prop.put("nav-filetypes_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, theQuery, theQuery.queryStringForUrl() + "+" + tnav, theQuery.urlMask.toString(), theQuery.navigators).toString() + "\">" + name + " (" + count + ")</a>");
prop.putJSON("nav-filetypes_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + tnav, theQuery.urlMask.toString(), theQuery.navigators).toString());
prop.put("nav-filetypes_element_" + i + "_count", count);
prop.put("nav-filetypes_element_" + i + "_modifier", "filetype:'" + name + "'");
prop.put("nav-filetypes_element_" + i + "_nl", 1);
i++;
}
prop.put("nav-filetypes_element", i);
i--;
prop.put("nav-filetypes_element_" + i + "_nl", 0);
}
// about box
final String aboutBody = env.getConfig("about.body", "");
final String aboutHeadline = env.getConfig("about.headline", "");

View File

@ -79,14 +79,19 @@ public final class RankingProcess extends Thread {
//private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final ScoreMap<String> ref; // reference score computation for the commonSense heuristic
private final ScoreMap<String> hostNavigator; // a counter for the appearance of the host hash
private final Map<String, byte[]> hostResolver; // a mapping from a host hash (6 bytes) to the full url hash of one of these urls that have the host hash
private final ScoreMap<String> authorNavigator;
private final ScoreMap<String> namespaceNavigator;
private final ReferenceOrder order;
private final long startTime;
private boolean addRunning;
// navigation scores
private final ScoreMap<String> hostNavigator; // a counter for the appearance of the host hash
private final ScoreMap<String> authorNavigator; // a counter for the appearances of authors
private final ScoreMap<String> namespaceNavigator; // a counter for name spaces
private final ScoreMap<String> protocolNavigator; // a counter for protocol types
private final ScoreMap<String> filetypeNavigator; // a counter for file types
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
@ -113,6 +118,8 @@ public final class RankingProcess extends Thread {
this.hostResolver = new ConcurrentHashMap<String, byte[]>();
this.authorNavigator = new ConcurrentScoreMap<String>();
this.namespaceNavigator = new ConcurrentScoreMap<String>();
this.protocolNavigator = new ConcurrentScoreMap<String>();
this.filetypeNavigator = new ConcurrentScoreMap<String>();
this.ref = new ConcurrentScoreMap<String>();
this.feeders = 1;
this.startTime = System.currentTimeMillis();
@ -516,6 +523,14 @@ public final class RankingProcess extends Thread {
}
}
// protocol navigation
final String protocol = metadata.url().getProtocol();
this.protocolNavigator.inc(protocol);
// file type navigation
final String fileext = metadata.url().getFileExtension();
if (fileext.length() > 0) this.filetypeNavigator.inc(fileext);
// check Scanner
if (!Scanner.acceptURL(metadata.url())) {
this.sortout++;
@ -623,6 +638,18 @@ public final class RankingProcess extends Thread {
return result;
}
public ScoreMap<String> getProtocolNavigator() {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("protocol") < 0) return new ClusteredScoreMap<String>();
if (this.protocolNavigator.sizeSmaller(2)) this.protocolNavigator.clear(); // navigators with one entry are not useful
return this.protocolNavigator;
}
public ScoreMap<String> getFiletypeNavigator() {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("filetype") < 0) return new ClusteredScoreMap<String>();
if (this.filetypeNavigator.sizeSmaller(2)) this.filetypeNavigator.clear(); // navigators with one entry are not useful
return this.filetypeNavigator;
}
public static final Comparator<Map.Entry<String, Integer>> mecomp = new Comparator<Map.Entry<String, Integer>>() {
public int compare(final Map.Entry<String, Integer> o1, final Map.Entry<String, Integer> o2) {
if (o1.getValue().intValue() < o2.getValue().intValue()) return 1;

View File

@ -348,6 +348,14 @@ public final class SearchEvent {
return this.rankingProcess.getAuthorNavigator();
}
public ScoreMap<String> getProtocolNavigator() {
return this.rankingProcess.getProtocolNavigator();
}
public ScoreMap<String> getFiletypeNavigator() {
return this.rankingProcess.getFiletypeNavigator();
}
public void addHeuristic(final byte[] urlhash, final String heuristicName, final boolean redundant) {
synchronized (this.heuristics) {
this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant));

View File

@ -29,7 +29,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
@ -54,6 +53,10 @@ public class migration {
migrateWorkFiles(sb);
}
installSkins(sb); // FIXME: yes, bad fix for quick release 0.47
// add new navigation
if (sb.getConfig("search.navigation", "").equals("hosts,authors,namespace,topics")) {
sb.setConfig("search.navigation", "hosts,authors,namespace,topics,filetype,protocol");
}
}
/*
* remove the static defaultfiles. We use them through a overlay now.
@ -78,7 +81,7 @@ public class migration {
if(file.exists())
delete(file);
}
/*
* copy skins from the release to DATA/SKINS.
*/
@ -88,10 +91,10 @@ public class migration {
if (defaultSkinsPath.exists()) {
final List<String> skinFiles = FileUtils.getDirListing(defaultSkinsPath.getAbsolutePath());
mkdirs(skinsPath);
for (String skinFile : skinFiles){
for (final String skinFile : skinFiles){
if (skinFile.endsWith(".css")){
File from = new File(defaultSkinsPath, skinFile);
File to = new File(skinsPath, skinFile);
final File from = new File(defaultSkinsPath, skinFile);
final File to = new File(skinsPath, skinFile);
if (from.lastModified() > to.lastModified()) try {
FileUtils.copy(from, to);
} catch (final IOException e) {}
@ -141,7 +144,7 @@ public class migration {
}
try {
sb.initBookmarks();
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}
@ -165,7 +168,7 @@ public class migration {
file.delete();
} catch (final IOException e) {
}
file = new File(sb.getDataPath(), "DATA/SETTINGS/wiki-bkp.db");
if (file.exists()) {
Log.logInfo("MIGRATION", "Migrating wiki-bkp.db to "+ sb.workPath);
@ -173,16 +176,16 @@ public class migration {
try {
FileUtils.copy(file, file2);
file.delete();
} catch (final IOException e) {}
} catch (final IOException e) {}
}
try {
sb.initWiki();
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}
file=new File(sb.getDataPath(), "DATA/SETTINGS/message.db");
if(file.exists()){
Log.logInfo("MIGRATION", "Migrating message.db to "+ sb.workPath);
@ -194,7 +197,7 @@ public class migration {
} catch (final IOException e) {}
try {
sb.initMessages();
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}
@ -207,7 +210,7 @@ public class migration {
sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(acc)));
sb.setConfig("adminAccount", "");
}
// fix unsafe old passwords
if ((acc = sb.getConfig("proxyAccountBase64", "")).length() > 0) {
sb.setConfig("proxyAccountBase64MD5", Digest.encodeMD5Hex(acc));
@ -224,14 +227,14 @@ public class migration {
}
public static void migrateSwitchConfigSettings(final Switchboard sb) {
// migration for additional parser settings
String value = "";
//Locales in DATA, because DATA must be writable, htroot not.
if(sb.getConfig("locale.translated_html", "DATA/LOCALE/htroot").equals("htroot/locale")){
sb.setConfig("locale.translated_html", "DATA/LOCALE/htroot");
}
// migration for blacklists
if ((value = sb.getConfig("proxyBlackListsActive","")).length() > 0) {
sb.setConfig("proxy.BlackLists", value);
@ -239,16 +242,16 @@ public class migration {
sb.setConfig("dht.BlackLists", value);
sb.setConfig("search.BlackLists", value);
sb.setConfig("surftips.BlackLists", value);
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
sb.setConfig("proxyBlackListsActive", "");
}
// migration of http specific crawler settings
if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) {
sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate"));
sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5"));
sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"));
}
sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"));
}
}
}