- some refactoring in search process

- separated sidebars in new search interface and placed them in their own files
  which can be put in into the search page like plug-ins

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4529 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2008-03-05 21:46:55 +00:00
parent a8f54dd244
commit bfed9c2da6
16 changed files with 368 additions and 276 deletions

View File

@ -92,11 +92,6 @@
<input type="radio" name="lines" value="100" checked="checked" />100&nbsp;&nbsp;
<input type="radio" name="lines" value="1000" />1000&nbsp;&nbsp;
</dd>
<dt class="TableCellDark">Ordering of list:</dt>
<dd><input type="radio" name="ordering" value="2" checked="checked" />by Ranking&nbsp;&nbsp;
<input type="radio" name="ordering" value="0"/>by URL&nbsp;&nbsp;
<input type="radio" name="ordering" value="1" />by URL Hash&nbsp;&nbsp;
</dd>
<dt class="TableCellLight"></dt>
<dd><input type="submit" name="urllist" value="List Selected URLs" />
</dd>

View File

@ -77,7 +77,6 @@ public class IndexControlRWIs_p {
// default values
String keystring = post.get("keystring", "").trim();
String keyhash = post.get("keyhash", "").trim();
int sortorder = post.getInt("ordering", 0);
prop.putHTML("keystring", keystring);
prop.put("keyhash", keyhash);
@ -89,7 +88,7 @@ public class IndexControlRWIs_p {
if (post.containsKey("keystringsearch")) {
keyhash = plasmaCondenser.word2hash(keystring);
prop.put("keyhash", keyhash);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 1);
prop.put("searchresult_word", keystring);
@ -100,7 +99,7 @@ public class IndexControlRWIs_p {
if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
}
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 2);
prop.put("searchresult_wordhash", keyhash);
@ -159,8 +158,8 @@ public class IndexControlRWIs_p {
}
kelondroBitfield flags = plasmaSearchAPI.compileFlags(post);
int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder);
plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, flags, count, sortorder);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags);
plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, flags, count);
}
// transfer to other peer

View File

@ -0,0 +1,14 @@
#(history)#::
<div id="partners" class="boxed">
<h2 class="title">Recent Searches</h2>
<div class="content">
<ul>
#{list}#
<li><a href="/yacy/user/ysearch.html?search=#[querystring]#&amp;resource=#[searchdom]#&amp;contentdom=#[contentdom]#">#[querystring]#</a></li>
#{/list}#
</ul>
<p>The search history is only visible for users from host #[host]#</p>
</div>
</div>
#(/history)#

View File

@ -0,0 +1,68 @@
// sidebar_history.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 03.03.2008 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.HashSet;
import java.util.Iterator;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class sidebar_history {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
final serverObjects prop = new serverObjects();
// list search history
Iterator<plasmaSearchQuery> i = sb.localSearches.iterator();
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
plasmaSearchQuery query;
int c = 0;
HashSet<String> visibleQueries = new HashSet<String>();
while (i.hasNext()) {
query = i.next();
if (query.resultcount == 0) continue;
if (query.offset != 0) continue;
if (!query.host.equals(client)) continue; // the search history should only be visible from the user who initiated the search
if (visibleQueries.contains(query.queryString)) continue; // avoid doubles
visibleQueries.add(query.queryString);
prop.put("history_list_" + c + "_querystring", query.queryString);
prop.put("history_list_" + c + "_searchdom", query.searchdom());
prop.put("history_list_" + c + "_contentdom", query.contentdom());
c++;
if (c >= 10) break;
}
prop.put("history_list", c);
prop.put("history_host", client);
if (c == 0) prop.put("history", 0); else prop.put("history", 1); // switch on if there is anything to see
return prop;
}
}

View File

@ -0,0 +1,37 @@
#(rssreferences)#::
<yacyTopwords:topwords>
#{words}#
<yacyTopwords:item><yacyTopwords:word>#[word]#</yacyTopwords:word></yacyTopwords:item>
#{/words}#
</yacyTopwords:topwords>
#(/rssreferences)#
#(navigation)#::
<div id="navigate" class="boxed">
<h2 class="title">Navigate</h2>
<div class="content">
<p><Strong>Page</strong>:
#[resnav]#
</p>
#(topwords)#::
<p><strong>Category</strong>:
<select onchange="window.location.href=this.options[this.selectedIndex].value">
<option selected="selected">-select-</option>
#{words}#
<option value="/yacy/user/ysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=#[zonecode]#">#[word]#</option>
#{/words}#
</select>
</p>
#(/topwords)#
#(languagezone)#::
<p><strong>Language Zone</strong>:
<select onchange="window.location.href=this.options[this.selectedIndex].value">
<option selected="selected">-select-</option>
#{zones}#
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=#[zonecode]#">#[zone]#</option>
#{/zones}#
</select>
</p>
#(/languagezone)#
</div>
</div>
#(/navigation)#

View File

@ -0,0 +1,173 @@
// sidebar_navigation.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 03.03.2008 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class sidebar_navigation {
private static final int MAX_TOPWORDS = 24;
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
final serverObjects prop = new serverObjects();
String eventID = post.get("eventID", "");
boolean rss = post.get("rss", "false").equals("true");
// default settings for blank item
prop.put("navigation", "0");
prop.put("rssreferences", "0");
// find search event
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(eventID);
if (theSearch == null) {
// the event does not exist, show empty page
return prop;
}
plasmaSearchQuery theQuery = theSearch.getQuery();
int offset = theQuery.neededResults() - theQuery.displayResults();
int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
// attach the bottom line with search references (topwords)
final Set<String> references = theSearch.references(20);
if (references.size() > 0) {
// get the topwords
final TreeSet<String> topwords = new TreeSet<String>(kelondroNaturalOrder.naturalComparator);
String tmp = "";
Iterator<String> i = references.iterator();
while (i.hasNext()) {
tmp = i.next();
if (tmp.matches("[a-z]+")) {
topwords.add(tmp);
}
}
// filter out the badwords
final TreeSet<String> filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
if (filteredtopwords.size() > 0) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords);
}
// avoid stopwords being topwords
if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) {
if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords);
}
}
if (rss) {
String word;
int hintcount = 0;
final Iterator<String> iter = topwords.iterator();
while (iter.hasNext()) {
word = (String) iter.next();
if (word != null) {
prop.putHTML("rssreferences_words_" + hintcount + "_word", word);
}
prop.put("rssreferences_words", hintcount);
if (hintcount++ > MAX_TOPWORDS) {
break;
}
}
prop.put("rssreferences", "1");
} else {
String word;
int hintcount = 0;
final Iterator<String> iter = topwords.iterator();
while (iter.hasNext()) {
word = (String) iter.next();
if ((theQuery == null) || (theQuery.queryString == null)) break;
if (word != null) {
prop.putHTML("navigation_topwords_words_" + hintcount + "_word", word);
prop.putHTML("navigation_topwords_words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
prop.put("navigation_topwords_words_" + hintcount + "_count", theQuery.displayResults());
prop.put("navigation_topwords_words_" + hintcount + "_offset", "0");
prop.put("navigation_topwords_words_" + hintcount + "_contentdom", theQuery.contentdom());
prop.put("navigation_topwords_words_" + hintcount + "_resource", theQuery.searchdom());
prop.put("navigation_topwords_words_" + hintcount + "_zonecode", theQuery.zonecode);
}
prop.put("navigation_topwords_words", hintcount);
if (hintcount++ > MAX_TOPWORDS) {
break;
}
}
prop.put("navigation_topwords", "1");
}
}
// compose page navigation
StringBuffer resnav = new StringBuffer();
int thispage = offset / theQuery.displayResults();
if (thispage == 0) resnav.append("&lt;&nbsp;"); else {
resnav.append(navurla(thispage - 1, theQuery));
resnav.append("<strong>&lt;</strong></a>&nbsp;");
}
int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount / theQuery.displayResults()));
for (int j = 0; j < numberofpages; j++) {
if (j == thispage) {
resnav.append("<strong>");
resnav.append(j + 1);
resnav.append("</strong>&nbsp;");
} else {
resnav.append(navurla(j, theQuery));
resnav.append(j + 1);
resnav.append("</a>&nbsp;");
}
}
if (thispage >= numberofpages) resnav.append("&gt;"); else {
resnav.append(navurla(thispage + 1, theQuery));
resnav.append("<strong>&gt;</strong></a>");
}
prop.put("navigation_resnav", resnav.toString());
prop.put("navigation", "1");
return prop;
}
private static String navurla(int page, plasmaSearchQuery theQuery) {
return
"<a href=\"ysearch.html?search=" + theQuery.queryString() +
"&amp;count="+ theQuery.displayResults() +
"&amp;offset=" + (page * theQuery.displayResults()) +
"&amp;resource=" + theQuery.searchdom() +
"&amp;urlmaskfilter=" + theQuery.urlMask +
"&amp;prefermaskfilter=" + theQuery.prefer +
"&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
"&amp;contentdom=" + theQuery.contentdom() +
"&amp;former=" + theQuery.queryString() + "\">";
}
}

View File

@ -141,7 +141,8 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results"
<div id="sidebar">
<!-- attach the bottomline -->
<!--#include virtual="/yacy/user/ysearchitem.html?bottomline=true&eventID=#[eventID]#" -->
<!--#include virtual="/yacy/user/sidebar_navigation.html?eventID=#[eventID]#" -->
<!--#include virtual="/yacy/user/sidebar_history.html?eventID=#[eventID]#" -->
</div>
</div>

View File

@ -38,50 +38,9 @@
<guid isPermaLink="false">#[urlhash]#</guid>
</item>
#(/rss)#
#(rssreferences)#::
<yacyTopwords:topwords>
#{words}#
<yacyTopwords:item><yacyTopwords:word>#[word]#</yacyTopwords:word></yacyTopwords:item>
#{/words}#
</yacyTopwords:topwords>
#(/rssreferences)#
#(dynamic)#::
<script type="text/javascript">
statistics("#[offset]#", "#[itemscount]#", "#[totalcount]#", "#[localResourceSize]#", "#[remoteResourceSize]#", "#[remoteIndexCount]#", "#[remotePeerCount]#");
progressbar.step(1);
</script>
#(/dynamic)#
#(navigation)#::
<div id="navigate" class="boxed">
<h2 class="title">Navigate</h2>
<div class="content">
<p><Strong>Page</strong>:
#[resnav]#
</p>
#(topwords)#::
<p><strong>Category</strong>:
<select onchange="window.location.href=this.options[this.selectedIndex].value">
<option selected="selected">-select-</option>
#{words}#
<option value="/yacy/user/ysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#">#[word]#</option>
#{/words}#
</select>
</p>
#(/topwords)#
</div>
</div>
#(/navigation)#
#(history)#::
<div id="partners" class="boxed">
<h2 class="title">Recent Searches</h2>
<div class="content">
<ul>
#{list}#
<li><a href="/yacy/user/ysearch.html?search=#[querystring]#&amp;resource=#[searchdom]#&amp;contentdom=#[contentdom]#">#[querystring]#</a></li>
#{/list}#
</ul>
<p>The search history is only visible for users from host #[host]#</p>
</div>
</div>
#(/history)#

View File

@ -28,14 +28,9 @@ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProcess;
@ -53,14 +48,12 @@ public class ysearchitem {
private static boolean col = true;
private static final int namelength = 60;
private static final int urllength = 120;
private static final int MAX_TOPWORDS = 24;
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
final serverObjects prop = new serverObjects();
String eventID = post.get("eventID", "");
boolean bottomline = post.get("bottomline", "false").equals("true");
boolean rss = post.get("rss", "false").equals("true");
int item = post.getInt("item", -1);
boolean auth = ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") || sb.verifyAuthentication(header, true);
@ -69,8 +62,6 @@ public class ysearchitem {
prop.put("content", "0");
prop.put("rss", "0");
prop.put("references", "0");
prop.put("rssreferences", "0");
prop.put("navigation", "0");
prop.put("dynamic", "0");
// find search event
@ -81,7 +72,6 @@ public class ysearchitem {
}
plasmaSearchQuery theQuery = theSearch.getQuery();
int offset = theQuery.neededResults() - theQuery.displayResults();
int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
// dynamically update count values
if (!rss) {
@ -95,129 +85,7 @@ public class ysearchitem {
prop.put("dynamic_resnav", "");
prop.put("dynamic", "1");
}
if (bottomline) {
// attach the bottom line with search references (topwords)
final Set<String> references = theSearch.references(20);
if (references.size() > 0) {
// get the topwords
final TreeSet<String> topwords = new TreeSet<String>(kelondroNaturalOrder.naturalComparator);
String tmp = "";
Iterator<String> i = references.iterator();
while (i.hasNext()) {
tmp = i.next();
if (tmp.matches("[a-z]+")) {
topwords.add(tmp);
}
}
// filter out the badwords
final TreeSet<String> filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
if (filteredtopwords.size() > 0) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords);
}
// avoid stopwords being topwords
if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) {
if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords);
}
}
if (rss) {
String word;
int hintcount = 0;
final Iterator<String> iter = topwords.iterator();
while (iter.hasNext()) {
word = (String) iter.next();
if (word != null) {
prop.putHTML("rssreferences_words_" + hintcount + "_word", word);
}
prop.put("rssreferences_words", hintcount);
if (hintcount++ > MAX_TOPWORDS) {
break;
}
}
prop.put("rssreferences", "1");
} else {
String word;
int hintcount = 0;
final Iterator<String> iter = topwords.iterator();
while (iter.hasNext()) {
word = (String) iter.next();
if ((theQuery == null) || (theQuery.queryString == null)) break;
if (word != null) {
prop.putHTML("navigation_topwords_words_" + hintcount + "_word", word);
prop.putHTML("navigation_topwords_words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
prop.put("navigation_topwords_words_" + hintcount + "_count", theQuery.displayResults());
prop.put("navigation_topwords_words_" + hintcount + "_offset", "0");
prop.put("navigation_topwords_words_" + hintcount + "_contentdom", theQuery.contentdom());
prop.put("navigation_topwords_words_" + hintcount + "_resource", theQuery.searchdom());
}
prop.put("navigation_topwords_words", hintcount);
if (hintcount++ > MAX_TOPWORDS) {
break;
}
}
prop.put("navigation_topwords", "1");
}
}
// compose page navigation
StringBuffer resnav = new StringBuffer();
int thispage = offset / theQuery.displayResults();
if (thispage == 0) resnav.append("&lt;&nbsp;"); else {
resnav.append(navurla(thispage - 1, theQuery));
resnav.append("<strong>&lt;</strong></a>&nbsp;");
}
int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount / theQuery.displayResults()));
for (int j = 0; j < numberofpages; j++) {
if (j == thispage) {
resnav.append("<strong>");
resnav.append(j + 1);
resnav.append("</strong>&nbsp;");
} else {
resnav.append(navurla(j, theQuery));
resnav.append(j + 1);
resnav.append("</a>&nbsp;");
}
}
if (thispage >= numberofpages) resnav.append("&gt;"); else {
resnav.append(navurla(thispage + 1, theQuery));
resnav.append("<strong>&gt;</strong></a>");
}
prop.put("navigation_resnav", resnav.toString());
prop.put("navigation", "1");
// list search history
Iterator<plasmaSearchQuery> i = sb.localSearches.iterator();
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
plasmaSearchQuery query;
int c = 0;
HashSet<String> visibleQueries = new HashSet<String>();
while (i.hasNext()) {
query = i.next();
if (query.resultcount == 0) continue;
if (query.offset != 0) continue;
if (!query.host.equals(client)) continue; // the search history should only be visible from the user who initiated the search
if (visibleQueries.contains(query.queryString)) continue; // avoid doubles
visibleQueries.add(query.queryString);
prop.put("history_list_" + c + "_querystring", query.queryString);
prop.put("history_list_" + c + "_searchdom", query.searchdom());
prop.put("history_list_" + c + "_contentdom", query.contentdom());
c++;
if (c >= 10) break;
}
prop.put("history_list", c);
prop.put("history_host", client);
if (c == 0) prop.put("history", 0); else prop.put("history", 1); // switch on if there is anything to see
return prop;
}
prop.put("rss", "0");
if (theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) {
// text search
@ -331,18 +199,4 @@ public class ysearchitem {
return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p);
}
private static String navurla(int page, plasmaSearchQuery theQuery) {
return
"<a href=\"ysearch.html?search=" + theQuery.queryString() +
"&amp;count="+ theQuery.displayResults() +
"&amp;offset=" + (page * theQuery.displayResults()) +
"&amp;resource=" + theQuery.searchdom() +
"&amp;urlmaskfilter=" + theQuery.urlMask +
"&amp;prefermaskfilter=" + theQuery.prefer +
"&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
"&amp;contentdom=" + theQuery.contentdom() +
"&amp;former=" + theQuery.queryString() + "\">";
}
}

View File

@ -88,9 +88,9 @@ public class plasmaSearchAPI {
}
}
public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int sortorder) {
public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter) {
plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, sb.getRanking(), filter);
plasmaSearchRankingProcess ranked = new plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE, 1);
plasmaSearchRankingProcess ranked = new plasmaSearchRankingProcess(sb.wordIndex, query, Integer.MAX_VALUE, 1);
ranked.execQuery();
if (ranked.filteredCount() == 0) {
@ -114,7 +114,7 @@ public class plasmaSearchAPI {
return ranked;
}
public static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int maxlines, int ordering) {
public static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int maxlines) {
// search for a word hash and generate a list of url links
prop.put("genUrlList_keyHash", keyhash);
@ -127,7 +127,6 @@ public class plasmaSearchAPI {
prop.put("searchresult", 3);
prop.put("genUrlList_flags", (flags == null) ? "" : flags.exportB64());
prop.put("genUrlList_lines", maxlines);
prop.put("genUrlList_ordering", ordering);
int i = 0;
yacyURL url;
indexURLEntry entry;

View File

@ -123,7 +123,7 @@ public final class plasmaSearchEvent {
if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ||
(query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
// do a global search
this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, 2, max_results_preparation, 16);
this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, max_results_preparation, 16);
int fetchpeers = 30;
@ -156,7 +156,7 @@ public final class plasmaSearchEvent {
serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
} else {
// do a local search
this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, 2, max_results_preparation, 2);
this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, max_results_preparation, 2);
this.rankedCache.execQuery();
//plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);

View File

@ -54,6 +54,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.serverCharBuffer;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.yacyURL;
public final class plasmaSearchQuery {
@ -80,7 +81,7 @@ public final class plasmaSearchQuery {
public int contentdom;
public String urlMask;
public int domType;
public String domGroupName;
public int zonecode;
public int domMaxTargets;
public int maxDistance;
public kelondroBitfield constraint;
@ -117,7 +118,7 @@ public final class plasmaSearchQuery {
this.offset = 0;
this.urlMask = ".*";
this.domType = SEARCHDOM_LOCAL;
this.domGroupName = "";
this.zonecode = yacyURL.language_domain_any_zone;
this.domMaxTargets = 0;
this.constraint = constraint;
this.allofconstraint = false;
@ -148,7 +149,7 @@ public final class plasmaSearchQuery {
//this.maximumTime = Math.min(6000, maximumTime);
this.urlMask = urlMask;
this.domType = domType;
this.domGroupName = domGroupName;
this.zonecode = yacyURL.language_domain_any_zone;
this.domMaxTargets = domMaxTargets;
this.constraint = constraint;
this.allofconstraint = allofconstraint;

View File

@ -40,7 +40,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryOrder;
import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexRWIVarEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBinSearch;
@ -49,6 +48,7 @@ import de.anomic.kelondro.kelondroSortStack;
import de.anomic.server.serverCodings;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverProfiling;
import de.anomic.yacy.yacyURL;
public final class plasmaSearchRankingProcess {
@ -59,7 +59,6 @@ public final class plasmaSearchRankingProcess {
private HashMap<String, kelondroSortStack<indexRWIVarEntry>> doubleDomCache; // key = domhash (6 bytes); value = like stack
private HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
private plasmaSearchQuery query;
private int sortorder;
private int maxentries;
private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize;
private indexRWIEntryOrder order;
@ -70,7 +69,7 @@ public final class plasmaSearchRankingProcess {
private plasmaWordIndex wordIndex;
private HashMap<String, indexContainer>[] localSearchContainerMaps;
public plasmaSearchRankingProcess(plasmaWordIndex wordIndex, plasmaSearchQuery query, int sortorder, int maxentries, int concurrency) {
public plasmaSearchRankingProcess(plasmaWordIndex wordIndex, plasmaSearchQuery query, int maxentries, int concurrency) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
@ -89,7 +88,6 @@ public final class plasmaSearchRankingProcess {
this.ref = new kelondroMScoreCluster<String>();
this.misses = new TreeSet<String>();
this.wordIndex = wordIndex;
this.sortorder = sortorder;
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
}
@ -120,57 +118,7 @@ public final class plasmaSearchRankingProcess {
return;
}
if (sortorder == 2) {
insertRanked(index, true, index.size());
} else {
insertNoOrder(index, true, index.size());
}
}
private void insertNoOrder(indexContainer index, boolean local, int fullResource) {
final Iterator<indexRWIRowEntry> en = index.entries();
// generate a new map where the urls are sorted (not by hash but by the url text)
if (local) {
this.local_resourceSize += fullResource;
} else {
this.remote_resourceSize += fullResource;
this.remote_peerCount++;
this.remote_indexCount += index.size();
}
indexRWIVarEntry ientry;
indexURLEntry uentry;
String u;
loop: while (en.hasNext()) {
ientry = new indexRWIVarEntry(en.next());
// check constraints
if (!testFlags(ientry)) continue loop;
// increase flag counts
for (int i = 0; i < 32; i++) {
if (ientry.flags().get(i)) {flagcount[i]++;}
}
// load url
if (sortorder == 0) {
this.stack.push(ientry, new Long(ientry.urlHash().hashCode()));
this.urlhashes.put(ientry.urlHash(), new Integer(ientry.urlHash().hashCode()));
} else {
uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
if (uentry == null) {
this.misses.add(ientry.urlHash());
} else {
u = uentry.comp().url().toNormalform(false, true);
this.stack.push(ientry, new Long(u.hashCode()));
this.urlhashes.put(ientry.urlHash(), new Integer(u.hashCode()));
}
}
// interrupt if we have enough
if ((query.neededResults() > 0) && (this.misses.size() + this.stack.size() > query.neededResults())) break loop;
} // end loop
insertRanked(index, true, index.size());
}
public void insertRanked(indexContainer index, boolean local, int fullResource) {
@ -222,6 +170,12 @@ public final class plasmaSearchRankingProcess {
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP ) && (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasapp )))) continue;
}
// check tld domain
if (!yacyURL.matchesAnyDomDomain(iEntry.urlHash(), this.query.zonecode)) {
// filter out all tld that do not match with wanted tld domain
continue;
}
// insert
if ((maxentries < 0) || (stack.size() < maxentries)) {
// in case that we don't have enough yet, accept any new entry

View File

@ -1445,6 +1445,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (hostAddress == null) return ((this.remoteProxyConfig != null) && (this.remoteProxyConfig.useProxy()));
// check if this is a local address and we are allowed to index local pages:
boolean local = hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress();
//assert local == yacyURL.isLocalDomain(url.hash()); // TODO: remove the dnsResolve above!
return (this.acceptGlobalURLs && !local) || (this.acceptLocalURLs && local);
}

View File

@ -24,10 +24,29 @@
package de.anomic.server;
import java.util.concurrent.LinkedBlockingQueue;
public class serverProcessor {
public static final int availableCPU = Runtime.getRuntime().availableProcessors();
public static int useCPU = availableCPU;
public static class queue<I, O> {
String nickname;
int priority;
serverProcess<I, O> implementation;
LinkedBlockingQueue<I> inputQueue;
LinkedBlockingQueue<O> outputQueue;
public queue(String nickname, int priority, serverProcess<I, O> implementation) {
this.nickname = nickname;
this.priority = priority;
this.implementation = implementation;
this.inputQueue = new LinkedBlockingQueue<I>();
this.outputQueue = new LinkedBlockingQueue<O>();
}
}
}

View File

@ -139,7 +139,7 @@ public class yacyURL {
"UY=Uruguay",
"VE=Venezuela"
};
private static final String[] TLD_EuropaRussia = {
private static final String[] TLD_EuropeRussia = {
// includes also countries that are mainly french- dutch- speaking
// and culturally close to europe
"AD=Andorra",
@ -373,20 +373,30 @@ public class yacyURL {
}
}
}
public static final int language_domain_europe_zone = 128 + 1; //{0, 7};
public static final int language_domain_english_zone = 128 + 16 + 64; //{4, 6, 7};
public static final int language_domain_spanish_zone = 128 + 2; //{1, 7};
public static final int language_domain_asia_zone = 128 + 4; //{2, 7};
public static final int language_domain_middleeast_zone = 128 + 8; //{3, 7};
public static final int language_domain_africa_zone = 128 + 32; //{5, 7};
public static final int language_domain_any_zone = 255;
public static final String[] regions = {"europe", "english", "spanish", "asia", "middleeast", "africa"};
static {
// create a dummy hash
dummyHash = "";
for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-";
// assign TLD-ids and names
insertTLDProps(TLD_EuropaRussia, 0);
insertTLDProps(TLD_MiddleSouthAmerica, 1);
insertTLDProps(TLD_SouthEastAsia, 2);
insertTLDProps(TLD_MiddleEastWestAsia, 3);
insertTLDProps(TLD_NorthAmericaOceania, 4);
insertTLDProps(TLD_Africa, 5);
insertTLDProps(TLD_Generic, 6);
insertTLDProps(TLD_EuropeRussia, 0); // European languages but no english
insertTLDProps(TLD_MiddleSouthAmerica, 1); // mainly spanish-speaking countries
insertTLDProps(TLD_SouthEastAsia, 2); // asia
insertTLDProps(TLD_MiddleEastWestAsia, 3); // middle east
insertTLDProps(TLD_NorthAmericaOceania, 4); // english-speaking countries
insertTLDProps(TLD_Africa, 5); // africa
insertTLDProps(TLD_Generic, 6); // anything else, mixed languages, mainly english
// the id=7 is used to flag local addresses
}
@ -971,7 +981,7 @@ public class yacyURL {
tld = host.substring(p + 1);
dom = host.substring(0, p);
}
Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) TLDID.get(tld); // identify local addresses
Integer ID = (serverDomains.isLocal(tld)) ? null : TLDID.get(tld); // identify local addresses
int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7
boolean isHTTP = this.protocol.equals("http");
p = dom.lastIndexOf('.'); // locate subdomain
@ -1083,14 +1093,22 @@ public class yacyURL {
// returns the ID of the domain of the domain
assert (urlHash != null);
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11));
return (flagbyte & 12) >> 2;
return (kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)) & 12) >> 2;
}
public static boolean isGlobalDomain(String urlhash) {
return domDomain(urlhash) != 7;
public static boolean isLocalDomain(String urlhash) {
return domDomain(urlhash) == 7;
}
public static boolean isDomDomain(String urlHash, int id) {
return domDomain(urlHash) == id;
}
public static boolean matchesAnyDomDomain(String urlHash, int idset) {
// this is a boolean matching on a set of domDomains
return (domDomain(urlHash) | idset) != 0;
}
// checks for local/global IP range and local IP
public boolean isLocal() {
return serverDomains.isLocal(this.host);