From bfed9c2da68d1b2d261a236cd8a774ee25e164e2 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 5 Mar 2008 21:46:55 +0000 Subject: [PATCH] - some refactoring in search process - separated sidebars in new search interface and placed them in their own files which can be put in into the search page like plug-ins git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4529 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControlRWIs_p.html | 5 - htroot/IndexControlRWIs_p.java | 9 +- htroot/yacy/user/sidebar_history.html | 14 ++ htroot/yacy/user/sidebar_history.java | 68 +++++++ htroot/yacy/user/sidebar_navigation.html | 37 ++++ htroot/yacy/user/sidebar_navigation.java | 173 ++++++++++++++++++ htroot/yacy/user/ysearch.html | 3 +- htroot/yacy/user/ysearchitem.html | 41 ----- htroot/yacy/user/ysearchitem.java | 146 --------------- source/de/anomic/plasma/plasmaSearchAPI.java | 7 +- .../de/anomic/plasma/plasmaSearchEvent.java | 4 +- .../de/anomic/plasma/plasmaSearchQuery.java | 7 +- .../plasma/plasmaSearchRankingProcess.java | 64 +------ .../de/anomic/plasma/plasmaSwitchboard.java | 1 + source/de/anomic/server/serverProcessor.java | 19 ++ source/de/anomic/yacy/yacyURL.java | 46 +++-- 16 files changed, 368 insertions(+), 276 deletions(-) create mode 100644 htroot/yacy/user/sidebar_history.html create mode 100644 htroot/yacy/user/sidebar_history.java create mode 100644 htroot/yacy/user/sidebar_navigation.html create mode 100644 htroot/yacy/user/sidebar_navigation.java diff --git a/htroot/IndexControlRWIs_p.html b/htroot/IndexControlRWIs_p.html index eb92aac4d..845735c4e 100644 --- a/htroot/IndexControlRWIs_p.html +++ b/htroot/IndexControlRWIs_p.html @@ -92,11 +92,6 @@ 100   1000   -
Ordering of list:
-
by Ranking   - by URL   - by URL Hash   -
diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 2b1e76515..9f811cd93 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -77,7 +77,6 @@ public class IndexControlRWIs_p { // default values String keystring = post.get("keystring", "").trim(); String keyhash = post.get("keyhash", "").trim(); - int sortorder = post.getInt("ordering", 0); prop.putHTML("keystring", keystring); prop.put("keyhash", keyhash); @@ -89,7 +88,7 @@ public class IndexControlRWIs_p { if (post.containsKey("keystringsearch")) { keyhash = plasmaCondenser.word2hash(keystring); prop.put("keyhash", keyhash); - final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder); + final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null); if (ranking.filteredCount() == 0) { prop.put("searchresult", 1); prop.put("searchresult_word", keystring); @@ -100,7 +99,7 @@ public class IndexControlRWIs_p { if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) { prop.put("keystring", "<not possible to compute word from hash>"); } - final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder); + final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null); if (ranking.filteredCount() == 0) { prop.put("searchresult", 2); prop.put("searchresult_wordhash", keyhash); @@ -159,8 +158,8 @@ public class IndexControlRWIs_p { } kelondroBitfield flags = plasmaSearchAPI.compileFlags(post); int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1); - final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder); - plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, flags, count, sortorder); + final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags); + plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, flags, count); } // transfer to other peer diff --git a/htroot/yacy/user/sidebar_history.html b/htroot/yacy/user/sidebar_history.html new file mode 100644 index 000000000..27f31cf66 --- /dev/null +++ b/htroot/yacy/user/sidebar_history.html @@ -0,0 +1,14 @@ +#(history)#:: +
+

Recent Searches

+
+ +

The search history is only visible for users from host #[host]#

+
+
+#(/history)# + diff --git a/htroot/yacy/user/sidebar_history.java b/htroot/yacy/user/sidebar_history.java new file mode 100644 index 000000000..834f0da73 --- /dev/null +++ b/htroot/yacy/user/sidebar_history.java @@ -0,0 +1,68 @@ +// sidebar_history.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 03.03.2008 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import java.util.HashSet; +import java.util.Iterator; + +import de.anomic.http.httpHeader; +import de.anomic.plasma.plasmaSearchQuery; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; + +public class sidebar_history { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + final plasmaSwitchboard sb = (plasmaSwitchboard) env; + final serverObjects prop = new serverObjects(); + + // list search history + Iterator i = sb.localSearches.iterator(); + String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); + plasmaSearchQuery query; + int c = 0; + HashSet visibleQueries = new HashSet(); + while (i.hasNext()) { + query = i.next(); + if (query.resultcount == 0) continue; + if (query.offset != 0) continue; + if (!query.host.equals(client)) continue; // the search history should only be visible from the user who initiated the search + if (visibleQueries.contains(query.queryString)) continue; // avoid doubles + visibleQueries.add(query.queryString); + prop.put("history_list_" + c + "_querystring", query.queryString); + prop.put("history_list_" + c + "_searchdom", query.searchdom()); + prop.put("history_list_" + c + "_contentdom", query.contentdom()); + c++; + if (c >= 10) break; + } + prop.put("history_list", c); + prop.put("history_host", client); + if (c == 0) prop.put("history", 0); else prop.put("history", 1); // switch on if there is anything to see + + return prop; + } + +} diff --git a/htroot/yacy/user/sidebar_navigation.html b/htroot/yacy/user/sidebar_navigation.html new file mode 100644 index 000000000..e7d0d9cc8 --- /dev/null +++ b/htroot/yacy/user/sidebar_navigation.html @@ -0,0 +1,37 @@ +#(rssreferences)#:: + +#{words}# +#[word]# +#{/words}# + +#(/rssreferences)# +#(navigation)#:: + +#(/navigation)# diff --git a/htroot/yacy/user/sidebar_navigation.java b/htroot/yacy/user/sidebar_navigation.java new file mode 100644 index 000000000..4ae133e7f --- /dev/null +++ b/htroot/yacy/user/sidebar_navigation.java @@ -0,0 +1,173 @@ +// sidebar_navigation.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 03.03.2008 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; + +import de.anomic.http.httpHeader; +import de.anomic.kelondro.kelondroMSetTools; +import de.anomic.kelondro.kelondroNaturalOrder; +import de.anomic.plasma.plasmaSearchEvent; +import de.anomic.plasma.plasmaSearchQuery; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; + +public class sidebar_navigation { + + private static final int MAX_TOPWORDS = 24; + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + final serverObjects prop = new serverObjects(); + + String eventID = post.get("eventID", ""); + boolean rss = post.get("rss", "false").equals("true"); + + // default settings for blank item + prop.put("navigation", "0"); + prop.put("rssreferences", "0"); + + // find search event + plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(eventID); + if (theSearch == null) { + // the event does not exist, show empty page + return prop; + } + plasmaSearchQuery theQuery = theSearch.getQuery(); + int offset = theQuery.neededResults() - theQuery.displayResults(); + int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); + + // attach the bottom line with search references (topwords) + final Set references = theSearch.references(20); + if (references.size() > 0) { + // get the topwords + final TreeSet topwords = new TreeSet(kelondroNaturalOrder.naturalComparator); + String tmp = ""; + Iterator i = references.iterator(); + while (i.hasNext()) { + tmp = i.next(); + if (tmp.matches("[a-z]+")) { + topwords.add(tmp); + } + } + + // filter out the badwords + final TreeSet filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords); + if (filteredtopwords.size() > 0) { + kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords); + } + + // avoid stopwords being topwords + if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) { + if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) { + kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords); + } + } + + if (rss) { + String word; + int hintcount = 0; + final Iterator iter = topwords.iterator(); + while (iter.hasNext()) { + word = (String) iter.next(); + if (word != null) { + prop.putHTML("rssreferences_words_" + hintcount + "_word", word); + } + prop.put("rssreferences_words", hintcount); + if (hintcount++ > MAX_TOPWORDS) { + break; + } + } + prop.put("rssreferences", "1"); + } else { + String word; + int hintcount = 0; + final Iterator iter = topwords.iterator(); + while (iter.hasNext()) { + word = (String) iter.next(); + if ((theQuery == null) || (theQuery.queryString == null)) break; + if (word != null) { + prop.putHTML("navigation_topwords_words_" + hintcount + "_word", word); + prop.putHTML("navigation_topwords_words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word); + prop.put("navigation_topwords_words_" + hintcount + "_count", theQuery.displayResults()); + prop.put("navigation_topwords_words_" + hintcount + "_offset", "0"); + prop.put("navigation_topwords_words_" + hintcount + "_contentdom", theQuery.contentdom()); + prop.put("navigation_topwords_words_" + hintcount + "_resource", theQuery.searchdom()); + prop.put("navigation_topwords_words_" + hintcount + "_zonecode", theQuery.zonecode); + } + prop.put("navigation_topwords_words", hintcount); + if (hintcount++ > MAX_TOPWORDS) { + break; + } + } + prop.put("navigation_topwords", "1"); + } + } + + // compose page navigation + StringBuffer resnav = new StringBuffer(); + int thispage = offset / theQuery.displayResults(); + if (thispage == 0) resnav.append("< "); else { + resnav.append(navurla(thispage - 1, theQuery)); + resnav.append("< "); + } + int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount / theQuery.displayResults())); + for (int j = 0; j < numberofpages; j++) { + if (j == thispage) { + resnav.append(""); + resnav.append(j + 1); + resnav.append(" "); + } else { + resnav.append(navurla(j, theQuery)); + resnav.append(j + 1); + resnav.append(" "); + } + } + if (thispage >= numberofpages) resnav.append(">"); else { + resnav.append(navurla(thispage + 1, theQuery)); + resnav.append(">"); + } + prop.put("navigation_resnav", resnav.toString()); + prop.put("navigation", "1"); + + return prop; + } + + private static String navurla(int page, plasmaSearchQuery theQuery) { + return + ""; + } + +} diff --git a/htroot/yacy/user/ysearch.html b/htroot/yacy/user/ysearch.html index f9d5a8c3d..d9ef2383e 100755 --- a/htroot/yacy/user/ysearch.html +++ b/htroot/yacy/user/ysearch.html @@ -141,7 +141,8 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results" diff --git a/htroot/yacy/user/ysearchitem.html b/htroot/yacy/user/ysearchitem.html index 29ad7765e..e4ea68e6c 100644 --- a/htroot/yacy/user/ysearchitem.html +++ b/htroot/yacy/user/ysearchitem.html @@ -38,50 +38,9 @@ #[urlhash]# #(/rss)# -#(rssreferences)#:: - -#{words}# -#[word]# -#{/words}# - -#(/rssreferences)# #(dynamic)#:: #(/dynamic)# -#(navigation)#:: - -#(/navigation)# -#(history)#:: -
-

Recent Searches

-
- -

The search history is only visible for users from host #[host]#

-
-
-#(/history)# - diff --git a/htroot/yacy/user/ysearchitem.java b/htroot/yacy/user/ysearchitem.java index b2fbc7706..bd69106a7 100644 --- a/htroot/yacy/user/ysearchitem.java +++ b/htroot/yacy/user/ysearchitem.java @@ -28,14 +28,9 @@ import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URLEncoder; import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; import java.util.TreeSet; import de.anomic.http.httpHeader; -import de.anomic.kelondro.kelondroMSetTools; -import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.plasma.plasmaSearchEvent; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSearchRankingProcess; @@ -53,14 +48,12 @@ public class ysearchitem { private static boolean col = true; private static final int namelength = 60; private static final int urllength = 120; - private static final int MAX_TOPWORDS = 24; public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { final plasmaSwitchboard sb = (plasmaSwitchboard) env; final serverObjects prop = new serverObjects(); String eventID = post.get("eventID", ""); - boolean bottomline = post.get("bottomline", "false").equals("true"); boolean rss = post.get("rss", "false").equals("true"); int item = post.getInt("item", -1); boolean auth = ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") || sb.verifyAuthentication(header, true); @@ -69,8 +62,6 @@ public class ysearchitem { prop.put("content", "0"); prop.put("rss", "0"); prop.put("references", "0"); - prop.put("rssreferences", "0"); - prop.put("navigation", "0"); prop.put("dynamic", "0"); // find search event @@ -81,7 +72,6 @@ public class ysearchitem { } plasmaSearchQuery theQuery = theSearch.getQuery(); int offset = theQuery.neededResults() - theQuery.displayResults(); - int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); // dynamically update count values if (!rss) { @@ -95,129 +85,7 @@ public class ysearchitem { prop.put("dynamic_resnav", ""); prop.put("dynamic", "1"); } - - if (bottomline) { - // attach the bottom line with search references (topwords) - final Set references = theSearch.references(20); - if (references.size() > 0) { - // get the topwords - final TreeSet topwords = new TreeSet(kelondroNaturalOrder.naturalComparator); - String tmp = ""; - Iterator i = references.iterator(); - while (i.hasNext()) { - tmp = i.next(); - if (tmp.matches("[a-z]+")) { - topwords.add(tmp); - } - } - // filter out the badwords - final TreeSet filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords); - if (filteredtopwords.size() > 0) { - kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords); - } - - // avoid stopwords being topwords - if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) { - if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) { - kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords); - } - } - - if (rss) { - String word; - int hintcount = 0; - final Iterator iter = topwords.iterator(); - while (iter.hasNext()) { - word = (String) iter.next(); - if (word != null) { - prop.putHTML("rssreferences_words_" + hintcount + "_word", word); - } - prop.put("rssreferences_words", hintcount); - if (hintcount++ > MAX_TOPWORDS) { - break; - } - } - prop.put("rssreferences", "1"); - } else { - String word; - int hintcount = 0; - final Iterator iter = topwords.iterator(); - while (iter.hasNext()) { - word = (String) iter.next(); - if ((theQuery == null) || (theQuery.queryString == null)) break; - if (word != null) { - prop.putHTML("navigation_topwords_words_" + hintcount + "_word", word); - prop.putHTML("navigation_topwords_words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word); - prop.put("navigation_topwords_words_" + hintcount + "_count", theQuery.displayResults()); - prop.put("navigation_topwords_words_" + hintcount + "_offset", "0"); - prop.put("navigation_topwords_words_" + hintcount + "_contentdom", theQuery.contentdom()); - prop.put("navigation_topwords_words_" + hintcount + "_resource", theQuery.searchdom()); - } - prop.put("navigation_topwords_words", hintcount); - if (hintcount++ > MAX_TOPWORDS) { - break; - } - } - prop.put("navigation_topwords", "1"); - } - - } - - // compose page navigation - StringBuffer resnav = new StringBuffer(); - int thispage = offset / theQuery.displayResults(); - if (thispage == 0) resnav.append("< "); else { - resnav.append(navurla(thispage - 1, theQuery)); - resnav.append("< "); - } - int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount / theQuery.displayResults())); - for (int j = 0; j < numberofpages; j++) { - if (j == thispage) { - resnav.append(""); - resnav.append(j + 1); - resnav.append(" "); - } else { - resnav.append(navurla(j, theQuery)); - resnav.append(j + 1); - resnav.append(" "); - } - } - if (thispage >= numberofpages) resnav.append(">"); else { - resnav.append(navurla(thispage + 1, theQuery)); - resnav.append(">"); - } - prop.put("navigation_resnav", resnav.toString()); - prop.put("navigation", "1"); - - // list search history - Iterator i = sb.localSearches.iterator(); - String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); - plasmaSearchQuery query; - int c = 0; - HashSet visibleQueries = new HashSet(); - while (i.hasNext()) { - query = i.next(); - if (query.resultcount == 0) continue; - if (query.offset != 0) continue; - if (!query.host.equals(client)) continue; // the search history should only be visible from the user who initiated the search - if (visibleQueries.contains(query.queryString)) continue; // avoid doubles - visibleQueries.add(query.queryString); - prop.put("history_list_" + c + "_querystring", query.queryString); - prop.put("history_list_" + c + "_searchdom", query.searchdom()); - prop.put("history_list_" + c + "_contentdom", query.contentdom()); - c++; - if (c >= 10) break; - } - prop.put("history_list", c); - prop.put("history_host", client); - if (c == 0) prop.put("history", 0); else prop.put("history", 1); // switch on if there is anything to see - - return prop; - } - - prop.put("rss", "0"); - if (theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) { // text search @@ -331,18 +199,4 @@ public class ysearchitem { return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p); } - - private static String navurla(int page, plasmaSearchQuery theQuery) { - return - ""; - } - } diff --git a/source/de/anomic/plasma/plasmaSearchAPI.java b/source/de/anomic/plasma/plasmaSearchAPI.java index 660abd1de..f5ae147ae 100644 --- a/source/de/anomic/plasma/plasmaSearchAPI.java +++ b/source/de/anomic/plasma/plasmaSearchAPI.java @@ -88,9 +88,9 @@ public class plasmaSearchAPI { } } - public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int sortorder) { + public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter) { plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, sb.getRanking(), filter); - plasmaSearchRankingProcess ranked = new plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE, 1); + plasmaSearchRankingProcess ranked = new plasmaSearchRankingProcess(sb.wordIndex, query, Integer.MAX_VALUE, 1); ranked.execQuery(); if (ranked.filteredCount() == 0) { @@ -114,7 +114,7 @@ public class plasmaSearchAPI { return ranked; } - public static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int maxlines, int ordering) { + public static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int maxlines) { // search for a word hash and generate a list of url links prop.put("genUrlList_keyHash", keyhash); @@ -127,7 +127,6 @@ public class plasmaSearchAPI { prop.put("searchresult", 3); prop.put("genUrlList_flags", (flags == null) ? "" : flags.exportB64()); prop.put("genUrlList_lines", maxlines); - prop.put("genUrlList_ordering", ordering); int i = 0; yacyURL url; indexURLEntry entry; diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 0ffc3b1c2..01ecd77b6 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -123,7 +123,7 @@ public final class plasmaSearchEvent { if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) || (query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) { // do a global search - this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, 2, max_results_preparation, 16); + this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, max_results_preparation, 16); int fetchpeers = 30; @@ -156,7 +156,7 @@ public final class plasmaSearchEvent { serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); } else { // do a local search - this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, 2, max_results_preparation, 2); + this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, max_results_preparation, 2); this.rankedCache.execQuery(); //plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process); diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index 87828b98a..d746c092e 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -54,6 +54,7 @@ import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.server.serverCharBuffer; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public final class plasmaSearchQuery { @@ -80,7 +81,7 @@ public final class plasmaSearchQuery { public int contentdom; public String urlMask; public int domType; - public String domGroupName; + public int zonecode; public int domMaxTargets; public int maxDistance; public kelondroBitfield constraint; @@ -117,7 +118,7 @@ public final class plasmaSearchQuery { this.offset = 0; this.urlMask = ".*"; this.domType = SEARCHDOM_LOCAL; - this.domGroupName = ""; + this.zonecode = yacyURL.language_domain_any_zone; this.domMaxTargets = 0; this.constraint = constraint; this.allofconstraint = false; @@ -148,7 +149,7 @@ public final class plasmaSearchQuery { //this.maximumTime = Math.min(6000, maximumTime); this.urlMask = urlMask; this.domType = domType; - this.domGroupName = domGroupName; + this.zonecode = yacyURL.language_domain_any_zone; this.domMaxTargets = domMaxTargets; this.constraint = constraint; this.allofconstraint = allofconstraint; diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index a30b7ba51..028664cd7 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -40,7 +40,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRWIEntryOrder; -import de.anomic.index.indexRWIRowEntry; import de.anomic.index.indexRWIVarEntry; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBinSearch; @@ -49,6 +48,7 @@ import de.anomic.kelondro.kelondroSortStack; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; import de.anomic.server.serverProfiling; +import de.anomic.yacy.yacyURL; public final class plasmaSearchRankingProcess { @@ -59,7 +59,6 @@ public final class plasmaSearchRankingProcess { private HashMap> doubleDomCache; // key = domhash (6 bytes); value = like stack private HashMap handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process private plasmaSearchQuery query; - private int sortorder; private int maxentries; private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize; private indexRWIEntryOrder order; @@ -70,7 +69,7 @@ public final class plasmaSearchRankingProcess { private plasmaWordIndex wordIndex; private HashMap[] localSearchContainerMaps; - public plasmaSearchRankingProcess(plasmaWordIndex wordIndex, plasmaSearchQuery query, int sortorder, int maxentries, int concurrency) { + public plasmaSearchRankingProcess(plasmaWordIndex wordIndex, plasmaSearchQuery query, int maxentries, int concurrency) { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime // sortorder: 0 = hash, 1 = url, 2 = ranking @@ -89,7 +88,6 @@ public final class plasmaSearchRankingProcess { this.ref = new kelondroMScoreCluster(); this.misses = new TreeSet(); this.wordIndex = wordIndex; - this.sortorder = sortorder; this.flagcount = new int[32]; for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} } @@ -120,57 +118,7 @@ public final class plasmaSearchRankingProcess { return; } - if (sortorder == 2) { - insertRanked(index, true, index.size()); - } else { - insertNoOrder(index, true, index.size()); - } - } - - private void insertNoOrder(indexContainer index, boolean local, int fullResource) { - final Iterator en = index.entries(); - // generate a new map where the urls are sorted (not by hash but by the url text) - - if (local) { - this.local_resourceSize += fullResource; - } else { - this.remote_resourceSize += fullResource; - this.remote_peerCount++; - this.remote_indexCount += index.size(); - } - - indexRWIVarEntry ientry; - indexURLEntry uentry; - String u; - loop: while (en.hasNext()) { - ientry = new indexRWIVarEntry(en.next()); - - // check constraints - if (!testFlags(ientry)) continue loop; - - // increase flag counts - for (int i = 0; i < 32; i++) { - if (ientry.flags().get(i)) {flagcount[i]++;} - } - - // load url - if (sortorder == 0) { - this.stack.push(ientry, new Long(ientry.urlHash().hashCode())); - this.urlhashes.put(ientry.urlHash(), new Integer(ientry.urlHash().hashCode())); - } else { - uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0); - if (uentry == null) { - this.misses.add(ientry.urlHash()); - } else { - u = uentry.comp().url().toNormalform(false, true); - this.stack.push(ientry, new Long(u.hashCode())); - this.urlhashes.put(ientry.urlHash(), new Integer(u.hashCode())); - } - } - - // interrupt if we have enough - if ((query.neededResults() > 0) && (this.misses.size() + this.stack.size() > query.neededResults())) break loop; - } // end loop + insertRanked(index, true, index.size()); } public void insertRanked(indexContainer index, boolean local, int fullResource) { @@ -222,6 +170,12 @@ public final class plasmaSearchRankingProcess { if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP ) && (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasapp )))) continue; } + // check tld domain + if (!yacyURL.matchesAnyDomDomain(iEntry.urlHash(), this.query.zonecode)) { + // filter out all tld that do not match with wanted tld domain + continue; + } + // insert if ((maxentries < 0) || (stack.size() < maxentries)) { // in case that we don't have enough yet, accept any new entry diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 7b2eadab8..da2d64e84 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1445,6 +1445,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (hostAddress == null) return ((this.remoteProxyConfig != null) && (this.remoteProxyConfig.useProxy())); // check if this is a local address and we are allowed to index local pages: boolean local = hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); + //assert local == yacyURL.isLocalDomain(url.hash()); // TODO: remove the dnsResolve above! return (this.acceptGlobalURLs && !local) || (this.acceptLocalURLs && local); } diff --git a/source/de/anomic/server/serverProcessor.java b/source/de/anomic/server/serverProcessor.java index 06992be4d..75efc9b84 100644 --- a/source/de/anomic/server/serverProcessor.java +++ b/source/de/anomic/server/serverProcessor.java @@ -24,10 +24,29 @@ package de.anomic.server; +import java.util.concurrent.LinkedBlockingQueue; + public class serverProcessor { public static final int availableCPU = Runtime.getRuntime().availableProcessors(); public static int useCPU = availableCPU; + + public static class queue { + String nickname; + int priority; + serverProcess implementation; + LinkedBlockingQueue inputQueue; + LinkedBlockingQueue outputQueue; + + public queue(String nickname, int priority, serverProcess implementation) { + this.nickname = nickname; + this.priority = priority; + this.implementation = implementation; + this.inputQueue = new LinkedBlockingQueue(); + this.outputQueue = new LinkedBlockingQueue(); + } + } + } diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index 86cb5ed4d..78ffad860 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -139,7 +139,7 @@ public class yacyURL { "UY=Uruguay", "VE=Venezuela" }; - private static final String[] TLD_EuropaRussia = { + private static final String[] TLD_EuropeRussia = { // includes also countries that are mainly french- dutch- speaking // and culturally close to europe "AD=Andorra", @@ -373,20 +373,30 @@ public class yacyURL { } } } - + + public static final int language_domain_europe_zone = 128 + 1; //{0, 7}; + public static final int language_domain_english_zone = 128 + 16 + 64; //{4, 6, 7}; + public static final int language_domain_spanish_zone = 128 + 2; //{1, 7}; + public static final int language_domain_asia_zone = 128 + 4; //{2, 7}; + public static final int language_domain_middleeast_zone = 128 + 8; //{3, 7}; + public static final int language_domain_africa_zone = 128 + 32; //{5, 7}; + public static final int language_domain_any_zone = 255; + + public static final String[] regions = {"europe", "english", "spanish", "asia", "middleeast", "africa"}; + static { // create a dummy hash dummyHash = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-"; // assign TLD-ids and names - insertTLDProps(TLD_EuropaRussia, 0); - insertTLDProps(TLD_MiddleSouthAmerica, 1); - insertTLDProps(TLD_SouthEastAsia, 2); - insertTLDProps(TLD_MiddleEastWestAsia, 3); - insertTLDProps(TLD_NorthAmericaOceania, 4); - insertTLDProps(TLD_Africa, 5); - insertTLDProps(TLD_Generic, 6); + insertTLDProps(TLD_EuropeRussia, 0); // European languages but no english + insertTLDProps(TLD_MiddleSouthAmerica, 1); // mainly spanish-speaking countries + insertTLDProps(TLD_SouthEastAsia, 2); // asia + insertTLDProps(TLD_MiddleEastWestAsia, 3); // middle east + insertTLDProps(TLD_NorthAmericaOceania, 4); // english-speaking countries + insertTLDProps(TLD_Africa, 5); // africa + insertTLDProps(TLD_Generic, 6); // anything else, mixed languages, mainly english // the id=7 is used to flag local addresses } @@ -971,7 +981,7 @@ public class yacyURL { tld = host.substring(p + 1); dom = host.substring(0, p); } - Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) TLDID.get(tld); // identify local addresses + Integer ID = (serverDomains.isLocal(tld)) ? null : TLDID.get(tld); // identify local addresses int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7 boolean isHTTP = this.protocol.equals("http"); p = dom.lastIndexOf('.'); // locate subdomain @@ -1083,14 +1093,22 @@ public class yacyURL { // returns the ID of the domain of the domain assert (urlHash != null); assert (urlHash.length() == 12) : "urlhash = " + urlHash; - int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); - return (flagbyte & 12) >> 2; + return (kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)) & 12) >> 2; } - public static boolean isGlobalDomain(String urlhash) { - return domDomain(urlhash) != 7; + public static boolean isLocalDomain(String urlhash) { + return domDomain(urlhash) == 7; } + public static boolean isDomDomain(String urlHash, int id) { + return domDomain(urlHash) == id; + } + + public static boolean matchesAnyDomDomain(String urlHash, int idset) { + // this is a boolean matching on a set of domDomains + return (domDomain(urlHash) | idset) != 0; + } + // checks for local/global IP range and local IP public boolean isLocal() { return serverDomains.isLocal(this.host);