mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- fixed/re-implemented media search
- fixed search tipps (topwords, now appearing at the bottom of the page) - added search consequences execution (deletion of bad referenced some time after the search happened) - added some formatting at network table git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4078 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
6c3bcadc1c
commit
8ff5e2c283
|
@ -51,14 +51,14 @@
|
|||
<tr class="TableHeader" valign="bottom">
|
||||
<td>send <strong>M</strong>essage/<br />show <strong>P</strong>rofile/<br />edit <strong>W</strong>iki<br /> </td>
|
||||
<td><strong>Name</strong><br /> </td>
|
||||
<td><strong>Info</strong><br /> </td>
|
||||
<td width="44"><strong>Info</strong><br /> </td>
|
||||
<td><strong>Release/<br />SVN</strong><br /> </td>
|
||||
<td><strong>PPM</strong><br /> </td>
|
||||
<td><strong>QPH</strong><br /> </td>
|
||||
<td><strong>Last<br />Seen</strong><br /> <a href="/Network.html?page=#[page]#&sort=LastSeen&order=up"><</a> <a href="/Network.html?page=#[page]#&sort=LastSeen&order=down">></a></td>
|
||||
<td><strong>UTC</strong><br />Offset<br /> </td>
|
||||
<td><strong>Location</strong><br /><br /> </td>
|
||||
<td><strong>Uptime</strong><br /> <a href="/Network.html?page=#[page]#&sort=Uptime&order=up"><</a> <a href="/Network.html?page=#[page]#&sort=Uptime&order=down">></a></td>
|
||||
<td width="70"><strong>Uptime</strong><br /> <a href="/Network.html?page=#[page]#&sort=Uptime&order=up"><</a> <a href="/Network.html?page=#[page]#&sort=Uptime&order=down">></a></td>
|
||||
<td><strong>Links</strong><br /> <a href="/Network.html?page=#[page]#&sort=LCount&order=up"><</a> <a href="/Network.html?page=#[page]#&sort=LCount&order=down">></a></td>
|
||||
<td><strong>RWIs</strong><br /> <a href="/Network.html?page=#[page]#&sort=ICount&order=up"><</a> <a href="/Network.html?page=#[page]#&sort=ICount&order=down">></a></td>
|
||||
<td><strong>URLs for<br />Remote<br />Crawl</strong><br /> </td>
|
||||
|
|
|
@ -89,6 +89,9 @@ public final class search {
|
|||
// test:
|
||||
// http://localhost:8080/yacy/search.html?query=4galTpdpDM5Q (search for linux)
|
||||
// http://localhost:8080/yacy/search.html?query=gh8DKIhGKXws (search for book)
|
||||
// http://localhost:8080/yacy/search.html?query=UEhMGfGv2vOE (search for kernel)
|
||||
// http://localhost:8080/yacy/search.html?query=ZX-LjaYo74PP (search for help)
|
||||
// http://localhost:8080/yacy/search.html?query=uDqIalxDfM2a (search for mail)
|
||||
// http://localhost:8080/yacy/search.html?query=4galTpdpDM5Qgh8DKIhGKXws&abstracts=auto (search for linux and book, generate abstract automatically)
|
||||
// http://localhost:8080/yacy/search.html?query=&abstracts=4galTpdpDM5Q (only abstracts for linux)
|
||||
|
||||
|
@ -223,7 +226,7 @@ public final class search {
|
|||
|
||||
// prepare reference hints
|
||||
localProcess.startTimer();
|
||||
Object[] ws = theSearch.references();
|
||||
Object[] ws = theSearch.references(10);
|
||||
StringBuffer refstr = new StringBuffer();
|
||||
for (int j = 0; j < ws.length; j++)
|
||||
refstr.append(",").append((String) ws[j]);
|
||||
|
|
|
@ -99,10 +99,6 @@ document.getElementById("Enter").value = "search again - catch up more links";
|
|||
::
|
||||
<p>No Results. (length of search words must be at least 3 characters)</p>
|
||||
::
|
||||
<p>No Results. Please repeat your search to see if there are late-responses from remote peers.</p>
|
||||
::
|
||||
<p>No Results. "<strong>#[wrong_regex]#</strong>" is no valid regular expression. Please go back to the previous page and make sure to enter a valid regular expressions for URL mask and Prefer mask.</p>
|
||||
::
|
||||
<p><strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[linkcount]#</strong> results from a total number of <strong id="totalcount">#[totalcount]#</strong> known#(globalresults)#.::, <strong id="globalcount">#[globalcount]#</strong> links from other YaCy peers.#(/globalresults)#<div id="results"></div></p>
|
||||
<p>Search Result Pages: <span id="pagenav">#[pagenav]#</span></p>
|
||||
::
|
||||
|
@ -111,19 +107,23 @@ document.getElementById("Enter").value = "search again - catch up more links";
|
|||
|
||||
<!-- the search results -->
|
||||
#(combine)#::
|
||||
<p><strong>Refine with</strong>:</p>
|
||||
<p>#{words}#<a href="yacysearch.html?search=#[newsearch]#&Enter=Search&count=#[count]#&offset=#[offset]#&resource=#[resource]#&time=#[time]#">#[word]#</a>#{/words}#</p>
|
||||
<p><strong>Topwords</strong>:
|
||||
#{words}# <a href="yacysearch.html?search=#[newsearch]#&Enter=Search&count=#[count]#&offset=#[offset]#&resource=#[resource]#&time=#[time]#">#[word]#</a>#{/words}#
|
||||
</p>
|
||||
#(/combine)#
|
||||
<script type="text/javascript">
|
||||
var progressbar = new Progressbar(#[results]#, document.getElementById("results"));
|
||||
</script>
|
||||
|
||||
<!-- linklist begin -->
|
||||
<div id="hidden_results"></div>
|
||||
#(resultTable)#::<table width="100%"><tr class="TableHeader"><td width="30%">Media</td><td width="70%">URL</tr>#(/resultTable)#
|
||||
#{results}#
|
||||
<!--#include virtual="yacysearchitem.html?item=#[item]#&eventID=#[eventID]#" -->
|
||||
#{/results}#
|
||||
#(resultTable)#::</table>#(/resultTable)#
|
||||
<!-- linklist end -->
|
||||
|
||||
<!-- attach the bottomline -->
|
||||
<!--#include virtual="yacysearchitem.html?bottomline=true&eventID=#[eventID]#" -->
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -49,14 +49,12 @@
|
|||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.index.indexURLEntry;
|
||||
import de.anomic.kelondro.kelondroBitfield;
|
||||
import de.anomic.kelondro.kelondroMSetTools;
|
||||
import de.anomic.kelondro.kelondroNaturalOrder;
|
||||
import de.anomic.plasma.plasmaCondenser;
|
||||
import de.anomic.plasma.plasmaParserDocument;
|
||||
import de.anomic.plasma.plasmaSearchEvent;
|
||||
|
@ -78,8 +76,6 @@ import de.anomic.yacy.yacyURL;
|
|||
|
||||
public class yacysearch {
|
||||
|
||||
public static final int MAX_TOPWORDS = 24;
|
||||
|
||||
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
|
||||
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
|
||||
|
||||
|
@ -142,7 +138,8 @@ public class yacysearch {
|
|||
prop.put("excluded", 0);
|
||||
prop.put("combine", 0);
|
||||
prop.put("results", "");
|
||||
prop.put("num-results", (searchAllowed) ? 0 : 6);
|
||||
prop.put("resultTable", 0);
|
||||
prop.put("num-results", (searchAllowed) ? 0 : 4);
|
||||
|
||||
return prop;
|
||||
}
|
||||
|
@ -286,7 +283,6 @@ public class yacysearch {
|
|||
long timestamp = System.currentTimeMillis();
|
||||
|
||||
// create a new search event
|
||||
String wrongregex = null;
|
||||
if (plasmaSearchEvent.getEvent(theQuery.id()) == null) {
|
||||
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
|
||||
offset = 0;
|
||||
|
@ -358,7 +354,9 @@ public class yacysearch {
|
|||
prop.put("results_" + i + "_eventID", theQuery.id());
|
||||
}
|
||||
prop.put("results", theQuery.displayResults());
|
||||
|
||||
prop.put("resultTable", (contentdomCode <= 1) ? 0 : 1);
|
||||
prop.put("eventID", theQuery.id()); // for bottomline
|
||||
|
||||
// process result of search
|
||||
if (filtered.size() > 0) {
|
||||
prop.put("excluded", 1);
|
||||
|
@ -374,63 +372,7 @@ public class yacysearch {
|
|||
prop.put("num-results", 1); // no results
|
||||
}
|
||||
} else {
|
||||
final int totalcount = prop.getInt("num-results_totalcount", 0);
|
||||
if (totalcount >= 10) {
|
||||
final Object[] references = (Object[]) prop.get( "references", new String[0]);
|
||||
prop.put("num-results", 5);
|
||||
int hintcount = references.length;
|
||||
if (hintcount > 0) {
|
||||
prop.put("combine", 1);
|
||||
// get the topwords
|
||||
final TreeSet topwords = new TreeSet(kelondroNaturalOrder.naturalOrder);
|
||||
String tmp = "";
|
||||
for (int i = 0; i < hintcount; i++) {
|
||||
tmp = (String) references[i];
|
||||
if (tmp.matches("[a-z]+")) {
|
||||
topwords.add(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
// filter out the badwords
|
||||
final TreeSet filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
|
||||
if (filteredtopwords.size() > 0) {
|
||||
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords);
|
||||
}
|
||||
|
||||
// avoid stopwords being topwords
|
||||
if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) {
|
||||
if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) {
|
||||
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords);
|
||||
}
|
||||
}
|
||||
|
||||
String word;
|
||||
hintcount = 0;
|
||||
final Iterator iter = topwords.iterator();
|
||||
while (iter.hasNext()) {
|
||||
word = (String) iter.next();
|
||||
if (word != null) {
|
||||
prop.put("combine_words_" + hintcount + "_word", word);
|
||||
prop.put("combine_words_" + hintcount + "_newsearch", post.get("search", "").replace(' ', '+') + "+" + word);
|
||||
prop.put("combine_words_" + hintcount + "_count", count);
|
||||
prop.put("combine_words_" + hintcount + "_offset", offset);
|
||||
prop.put("combine_words_" + hintcount + "_resource", ((global) ? "global" : "local"));
|
||||
prop.put("combine_words_" + hintcount + "_time", (searchtime / 1000));
|
||||
}
|
||||
prop.put("combine_words", hintcount);
|
||||
if (hintcount++ > MAX_TOPWORDS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (wrongregex != null) {
|
||||
prop.put("num-results_wrong_regex", wrongregex);
|
||||
prop.put("num-results", 4);
|
||||
} else {
|
||||
prop.put("num-results", 5);
|
||||
}
|
||||
}
|
||||
prop.put("num-results", 3);
|
||||
}
|
||||
|
||||
prop.put("input_cat", "href");
|
||||
|
|
|
@ -20,13 +20,30 @@
|
|||
<p class="urlinfo">#[date]# | YBR-#[ybr]# | <a href="ViewFile.html?urlHash=#[urlhash]#&words=#[words]#">Info</a> | <a href="yacysearch.html?cat=image&url=#[url]#&search=#[former]#">Pictures</a></p>
|
||||
</div>
|
||||
::
|
||||
#{images}#
|
||||
#{items}#
|
||||
<div class="thumbcontainer">
|
||||
<a href="#[href]#" class="thumblink"><img src="/ViewImage.png?maxwidth=96&maxheight=96&code=#[code]#" alt="#[name]#"></a>
|
||||
<div class="TableCellDark"><a href="#[href]#">#[name]#</a></div>
|
||||
</div>
|
||||
#{/images}#
|
||||
#{/items}#
|
||||
::
|
||||
#{items}#
|
||||
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#">#[hrefshort]#</a></tr>
|
||||
#{/items}#
|
||||
::
|
||||
#{items}#
|
||||
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#">#[hrefshort]#</a></tr>
|
||||
#{/items}#
|
||||
::
|
||||
#{items}#
|
||||
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#">#[hrefshort]#</a></tr>
|
||||
#{/items}#
|
||||
#(/content)#
|
||||
#(references)#::
|
||||
<p><strong>Topwords</strong>:
|
||||
#{words}# <a href="yacysearch.html?search=#[newsearch]#&Enter=Search&count=#[count]#&offset=#[offset]#&resource=#[resource]#&time=#[time]#">#[word]#</a>#{/words}#
|
||||
</p>
|
||||
#(/references)#
|
||||
<script type="text/javascript">
|
||||
statistics("#[offset]#", "#[items]#", "#[global]#", "#[total]#");
|
||||
progressbar.step(1);
|
||||
|
|
|
@ -28,9 +28,12 @@ import java.io.UnsupportedEncodingException;
|
|||
import java.net.MalformedURLException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.kelondro.kelondroMSetTools;
|
||||
import de.anomic.kelondro.kelondroNaturalOrder;
|
||||
import de.anomic.plasma.plasmaSearchEvent;
|
||||
import de.anomic.plasma.plasmaSearchPreOrder;
|
||||
import de.anomic.plasma.plasmaSearchQuery;
|
||||
|
@ -49,12 +52,17 @@ import de.anomic.yacy.yacyURL;
|
|||
|
||||
public class yacysearchitem {
|
||||
|
||||
private static boolean col = true;
|
||||
private static final int namelength = 60;
|
||||
private static final int urllength = 120;
|
||||
private static final int MAX_TOPWORDS = 24;
|
||||
|
||||
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
|
||||
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
|
||||
final serverObjects prop = new serverObjects();
|
||||
|
||||
String eventID = post.get("eventID", "");
|
||||
int item = post.getInt("item", -1);
|
||||
boolean bottomline = post.get("bottomline", "false").equals("true");
|
||||
boolean authenticated = sb.adminAuthenticated(header) >= 2;
|
||||
|
||||
// find search event
|
||||
|
@ -62,14 +70,73 @@ public class yacysearchitem {
|
|||
plasmaSearchQuery theQuery = theSearch.getQuery();
|
||||
plasmaSearchRankingProfile ranking = theSearch.getRanking();
|
||||
|
||||
// generate result object
|
||||
plasmaSearchEvent.ResultEntry result = theSearch.oneResult(item);
|
||||
|
||||
// dynamically update count values
|
||||
prop.put("offset", theQuery.neededResults() - theQuery.displayResults() + 1);
|
||||
prop.put("items", item + 1);
|
||||
prop.put("global", theSearch.getGlobalCount());
|
||||
prop.put("total", theSearch.getGlobalCount() + theSearch.getLocalCount());
|
||||
prop.put("items", theQuery.displayResults());
|
||||
|
||||
if (bottomline) {
|
||||
// attach the bottom line with search references (topwords)
|
||||
final Object[] references = theSearch.references(20);
|
||||
int hintcount = references.length;
|
||||
if (hintcount > 0) {
|
||||
prop.put("references", 1);
|
||||
// get the topwords
|
||||
final TreeSet topwords = new TreeSet(kelondroNaturalOrder.naturalOrder);
|
||||
String tmp = "";
|
||||
for (int i = 0; i < hintcount; i++) {
|
||||
tmp = (String) references[i];
|
||||
if (tmp.matches("[a-z]+")) {
|
||||
topwords.add(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
// filter out the badwords
|
||||
final TreeSet filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
|
||||
if (filteredtopwords.size() > 0) {
|
||||
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords);
|
||||
}
|
||||
|
||||
// avoid stopwords being topwords
|
||||
if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) {
|
||||
if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) {
|
||||
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords);
|
||||
}
|
||||
}
|
||||
|
||||
String word;
|
||||
hintcount = 0;
|
||||
final Iterator iter = topwords.iterator();
|
||||
while (iter.hasNext()) {
|
||||
word = (String) iter.next();
|
||||
if (word != null) {
|
||||
prop.put("references_words_" + hintcount + "_word", word);
|
||||
prop.put("references_words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
|
||||
prop.put("references_words_" + hintcount + "_count", theQuery.displayResults());
|
||||
prop.put("references_words_" + hintcount + "_offset", 0);
|
||||
prop.put("references_words_" + hintcount + "_resource", theQuery.searchdom());
|
||||
prop.put("references_words_" + hintcount + "_time", (theQuery.maximumTime / 1000));
|
||||
}
|
||||
prop.put("references_words", hintcount);
|
||||
if (hintcount++ > MAX_TOPWORDS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
prop.put("references", 0);
|
||||
}
|
||||
|
||||
return prop;
|
||||
}
|
||||
|
||||
// no bottomline
|
||||
prop.put("references", 0);
|
||||
|
||||
// generate result object
|
||||
int item = post.getInt("item", -1);
|
||||
prop.put("items", (item < 0) ? theQuery.displayResults() : item + 1);
|
||||
plasmaSearchEvent.ResultEntry result = theSearch.oneResult(item);
|
||||
|
||||
if (result == null) {
|
||||
prop.put("content", 0); // no content
|
||||
|
@ -99,7 +166,7 @@ public class yacysearchitem {
|
|||
prop.put("content_faviconCode", sb.licensedURLs.aquireLicense(faviconURL)); // aquire license for favicon url loading
|
||||
prop.put("content_urlhash", result.hash());
|
||||
prop.put("content_urlhexhash", yacySeed.b64Hash2hexHash(result.hash()));
|
||||
prop.put("content_urlname", nxTools.shortenURLString(result.urlname(), 120));
|
||||
prop.put("content_urlname", nxTools.shortenURLString(result.urlname(), urllength));
|
||||
prop.put("content_date", plasmaSwitchboard.dateString(result.modified()));
|
||||
prop.put("content_ybr", plasmaSearchPreOrder.ybr(result.hash()));
|
||||
prop.put("content_size", Long.toString(result.filesize()));
|
||||
|
@ -128,19 +195,49 @@ public class yacysearchitem {
|
|||
for (int i = 0; i < images.size(); i++) {
|
||||
ms = (plasmaSnippetCache.MediaSnippet) images.get(i);
|
||||
try {url = new yacyURL(ms.href, null);} catch (MalformedURLException e) {continue;}
|
||||
prop.put("content_images_" + i + "_href", ms.href);
|
||||
prop.put("content_images_" + i + "_code", sb.licensedURLs.aquireLicense(url));
|
||||
prop.put("content_images_" + i + "_name", ms.name);
|
||||
prop.put("content_images_" + i + "_attr", ms.attr); // attributes, here: original size of image
|
||||
prop.put("content_items_" + i + "_href", ms.href);
|
||||
prop.put("content_items_" + i + "_code", sb.licensedURLs.aquireLicense(url));
|
||||
prop.put("content_items_" + i + "_name", shorten(ms.name, namelength));
|
||||
prop.put("content_items_" + i + "_attr", ms.attr); // attributes, here: original size of image
|
||||
c++;
|
||||
}
|
||||
prop.put("content_images", c);
|
||||
prop.put("content_items", c);
|
||||
} else {
|
||||
prop.put("content_images", 0);
|
||||
prop.put("content_items", 0);
|
||||
}
|
||||
}
|
||||
|
||||
if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ||
|
||||
(theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ||
|
||||
(theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_APP)) {
|
||||
// any other media content
|
||||
ArrayList /* of plasmaSnippetCache.MediaSnippet */ media = result.mediaSnippets();
|
||||
if (item == 0) col = true;
|
||||
if (media != null) {
|
||||
plasmaSnippetCache.MediaSnippet ms;
|
||||
int c = 0;
|
||||
for (int i = 0; i < media.size(); i++) {
|
||||
ms = (plasmaSnippetCache.MediaSnippet) media.get(i);
|
||||
prop.put("content_items_" + i + "_href", ms.href);
|
||||
prop.put("content_items_" + i + "_hrefshort", nxTools.shortenURLString(ms.href, urllength));
|
||||
prop.put("content_items_" + i + "_name", shorten(ms.name, namelength));
|
||||
prop.put("content_items_" + i + "_col", (col) ? 0 : 1);
|
||||
c++;
|
||||
col = !col;
|
||||
}
|
||||
prop.put("content_items", c);
|
||||
} else {
|
||||
prop.put("content_items", 0);
|
||||
}
|
||||
}
|
||||
|
||||
return prop;
|
||||
}
|
||||
|
||||
private static String shorten(String s, int length) {
|
||||
if (s.length() <= length) return s;
|
||||
int p = s.lastIndexOf('.');
|
||||
if (p < 0) return s.substring(0, length - 3) + "...";
|
||||
return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ public class plasmaCrawlNURL {
|
|||
public static final int STACK_TYPE_MOVIE = 12; // put on movie stack
|
||||
public static final int STACK_TYPE_MUSIC = 13; // put on music stack
|
||||
|
||||
private static final long minimumLocalDelta = 100; // the minimum time difference between access of the same local domain
|
||||
private static final long minimumLocalDelta = 50; // the minimum time difference between access of the same local domain
|
||||
private static final long minimumGlobalDelta = 500; // the minimum time difference between access of the same global domain
|
||||
private static final long maximumDomAge = 60000; // the maximum age of a domain until it is used for another crawl attempt
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.util.Date;
|
|||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
@ -64,7 +65,7 @@ public final class plasmaSearchEvent {
|
|||
private plasmaSearchProcessing process;
|
||||
private yacySearch[] primarySearchThreads, secondarySearchThreads;
|
||||
private TreeMap preselectedPeerHashes;
|
||||
private Object[] references;
|
||||
//private Object[] references;
|
||||
public TreeMap IAResults, IACount;
|
||||
public String IAmaxcounthash, IAneardhthash;
|
||||
private int localcount;
|
||||
|
@ -89,7 +90,6 @@ public final class plasmaSearchEvent {
|
|||
this.primarySearchThreads = null;
|
||||
this.secondarySearchThreads = null;
|
||||
this.preselectedPeerHashes = preselectedPeerHashes;
|
||||
this.references = new String[0];
|
||||
this.IAResults = new TreeMap();
|
||||
this.IACount = new TreeMap();
|
||||
this.IAmaxcounthash = null;
|
||||
|
@ -250,8 +250,14 @@ public final class plasmaSearchEvent {
|
|||
// fetch next entry to work on
|
||||
indexContainer c = rankedCache.container();
|
||||
indexRWIEntry entry = new indexRWIEntry(c.get(rankedIndex++));
|
||||
|
||||
ResultEntry resultEntry = obtainResultEntry(entry, false);
|
||||
indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry);
|
||||
|
||||
if (page == null) {
|
||||
registerFailure(entry.urlHash(), "url does not exist in lurl-db");
|
||||
continue;
|
||||
}
|
||||
|
||||
ResultEntry resultEntry = obtainResultEntry(page, false);
|
||||
if (resultEntry == null) continue; // the entry had some problems, cannot be used
|
||||
|
||||
// place the result to the result vector
|
||||
|
@ -267,18 +273,34 @@ public final class plasmaSearchEvent {
|
|||
process.yield("offline snippet fetch", resultList.size());
|
||||
}
|
||||
|
||||
// remove old events in the event cache
|
||||
Iterator i = lastEvents.entrySet().iterator();
|
||||
while (i.hasNext()) {
|
||||
if (((plasmaSearchEvent) ((Map.Entry) i.next()).getValue()).eventTime + eventLifetime < System.currentTimeMillis()) i.remove();
|
||||
}
|
||||
// clean up events
|
||||
cleanupEvents();
|
||||
|
||||
// store this search to a cache so it can be re-used
|
||||
lastEvents.put(query.id(), this);
|
||||
lastEventID = query.id();
|
||||
}
|
||||
|
||||
private ResultEntry obtainResultEntry(indexRWIEntry entry, boolean fetchSnippetOnline) {
|
||||
private static void cleanupEvents() {
|
||||
// remove old events in the event cache
|
||||
Iterator i = lastEvents.entrySet().iterator();
|
||||
plasmaSearchEvent cleanEvent;
|
||||
while (i.hasNext()) {
|
||||
cleanEvent = (plasmaSearchEvent) ((Map.Entry) i.next()).getValue();
|
||||
if (cleanEvent.eventTime + eventLifetime < System.currentTimeMillis()) {
|
||||
// execute deletion of failed words
|
||||
Set removeWords = cleanEvent.query.queryHashes;
|
||||
removeWords.addAll(cleanEvent.query.excludeHashes);
|
||||
cleanEvent.wordIndex.removeEntriesMultiple(removeWords, cleanEvent.failedURLs.keySet());
|
||||
serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id() + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words");
|
||||
|
||||
// remove the event
|
||||
i.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ResultEntry obtainResultEntry(indexURLEntry page, boolean fetchSnippetOnline) {
|
||||
|
||||
// a search result entry needs some work to produce a result Entry:
|
||||
// - check if url entry exists in LURL-db
|
||||
|
@ -288,39 +310,24 @@ public final class plasmaSearchEvent {
|
|||
// load only urls if there was not yet a root url of that hash
|
||||
// find the url entry
|
||||
|
||||
indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry);
|
||||
|
||||
if (page == null) {
|
||||
registerFailure(entry.urlHash(), "url does not exist in lurl-db");
|
||||
return null;
|
||||
}
|
||||
|
||||
indexURLEntry.Components comp = page.comp();
|
||||
String pagetitle = comp.title().toLowerCase();
|
||||
if (comp.url() == null) {
|
||||
registerFailure(entry.urlHash(), "url corrupted (null)");
|
||||
registerFailure(page.hash(), "url corrupted (null)");
|
||||
return null; // rare case where the url is corrupted
|
||||
}
|
||||
String pageurl = comp.url().toString().toLowerCase();
|
||||
String pageauthor = comp.author().toLowerCase();
|
||||
|
||||
// check exclusion
|
||||
if (plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) {
|
||||
registerFailure(entry.urlHash(), "query-exclusion matches title: " + pagetitle);
|
||||
return null;
|
||||
}
|
||||
if (plasmaSearchQuery.matches(pageurl, query.excludeHashes)) {
|
||||
registerFailure(entry.urlHash(), "query-exclusion matches title: " + pagetitle);
|
||||
return null;
|
||||
}
|
||||
if (plasmaSearchQuery.matches(pageauthor, query.excludeHashes)) {
|
||||
registerFailure(entry.urlHash(), "query-exclusion matches title: " + pagetitle);
|
||||
if ((plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) ||
|
||||
(plasmaSearchQuery.matches(pageurl, query.excludeHashes)) ||
|
||||
(plasmaSearchQuery.matches(pageauthor, query.excludeHashes))) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// check url mask
|
||||
if (!(pageurl.matches(query.urlMask))) {
|
||||
registerFailure(entry.urlHash(), "url-exclusion matches urlMask: " + pageurl);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -330,24 +337,24 @@ public final class plasmaSearchEvent {
|
|||
(!(comp.title().startsWith("Index of")))) {
|
||||
final Iterator wi = query.queryHashes.iterator();
|
||||
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
|
||||
registerFailure(entry.urlHash(), "index-of constrained not fullfilled");
|
||||
registerFailure(page.hash(), "index-of constrained not fullfilled");
|
||||
return null;
|
||||
}
|
||||
|
||||
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() == 0)) {
|
||||
registerFailure(entry.urlHash(), "contentdom-audio constrained not fullfilled");
|
||||
registerFailure(page.hash(), "contentdom-audio constrained not fullfilled");
|
||||
return null;
|
||||
}
|
||||
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() == 0)) {
|
||||
registerFailure(entry.urlHash(), "contentdom-video constrained not fullfilled");
|
||||
registerFailure(page.hash(), "contentdom-video constrained not fullfilled");
|
||||
return null;
|
||||
}
|
||||
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() == 0)) {
|
||||
registerFailure(entry.urlHash(), "contentdom-image constrained not fullfilled");
|
||||
registerFailure(page.hash(), "contentdom-image constrained not fullfilled");
|
||||
return null;
|
||||
}
|
||||
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() == 0)) {
|
||||
registerFailure(entry.urlHash(), "contentdom-app constrained not fullfilled");
|
||||
registerFailure(page.hash(), "contentdom-app constrained not fullfilled");
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -364,7 +371,7 @@ public final class plasmaSearchEvent {
|
|||
return new ResultEntry(page, wordIndex, null, null); // result without snippet
|
||||
} else {
|
||||
// problems with snippet fetch
|
||||
registerFailure(entry.urlHash(), "no text snippet for URL " + comp.url());
|
||||
registerFailure(page.hash(), "no text snippet for URL " + comp.url());
|
||||
plasmaSnippetCache.failConsequences(snippet, query.id());
|
||||
return null;
|
||||
}
|
||||
|
@ -378,7 +385,7 @@ public final class plasmaSearchEvent {
|
|||
return new ResultEntry(page, wordIndex, null, null);
|
||||
} else {
|
||||
// problems with snippet fetch
|
||||
registerFailure(entry.urlHash(), "no media snippet for URL " + comp.url());
|
||||
registerFailure(page.hash(), "no media snippet for URL " + comp.url());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -492,18 +499,24 @@ public final class plasmaSearchEvent {
|
|||
while ((resultList.size() < query.neededResults() + query.displayResults()) && (System.currentTimeMillis() < this.timeout)) {
|
||||
|
||||
// try secondary search
|
||||
prepareSecondarySearch();
|
||||
prepareSecondarySearch(); // will be executed only once
|
||||
|
||||
// fetch next entry to work on
|
||||
this.entry = null;
|
||||
entry = nextOrder();
|
||||
if (entry == null) {
|
||||
// wait and try again
|
||||
try {Thread.sleep(200);} catch (InterruptedException e) {}
|
||||
try {Thread.sleep(100);} catch (InterruptedException e) {}
|
||||
continue;
|
||||
}
|
||||
|
||||
ResultEntry resultEntry = obtainResultEntry(entry, true);
|
||||
|
||||
indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry);
|
||||
if (page == null) {
|
||||
registerFailure(entry.urlHash(), "url does not exist in lurl-db");
|
||||
continue;
|
||||
}
|
||||
|
||||
ResultEntry resultEntry = obtainResultEntry(page, true);
|
||||
if (resultEntry == null) continue; // the entry had some problems, cannot be used
|
||||
|
||||
// place the result to the result vector
|
||||
|
@ -730,8 +743,8 @@ public final class plasmaSearchEvent {
|
|||
//assert e != null;
|
||||
}
|
||||
|
||||
public Object[] references() {
|
||||
return this.references;
|
||||
public Object[] references(int count) {
|
||||
return this.rankedCache.getReferences(count);
|
||||
}
|
||||
|
||||
public static class ResultEntry {
|
||||
|
|
Loading…
Reference in New Issue
Block a user