- fixed/re-implemented media search

- fixed search tipps (topwords, now appearing at the bottom of the page)
- added search consequences execution (deletion of bad referenced some time after the search happened)
- added some formatting at network table

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4078 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2007-09-07 11:45:38 +00:00
parent 6c3bcadc1c
commit 8ff5e2c283
8 changed files with 203 additions and 131 deletions

View File

@ -51,14 +51,14 @@
<tr class="TableHeader" valign="bottom">
<td>send&nbsp;<strong>M</strong>essage/<br />show&nbsp;<strong>P</strong>rofile/<br />edit&nbsp;<strong>W</strong>iki<br />&nbsp;</td>
<td><strong>Name</strong><br />&nbsp;</td>
<td><strong>Info</strong><br />&nbsp;</td>
<td width="44"><strong>Info</strong><br />&nbsp;</td>
<td><strong>Release/<br />SVN</strong><br />&nbsp;</td>
<td><strong>PPM</strong><br />&nbsp;</td>
<td><strong>QPH</strong><br />&nbsp;</td>
<td><strong>Last<br />Seen</strong><br />&nbsp;&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=LastSeen&amp;order=up">&lt;</a>&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=LastSeen&amp;order=down">&gt;</a></td>
<td><strong>UTC</strong><br />Offset<br />&nbsp;</td>
<td><strong>Location</strong><br /><br />&nbsp;</td>
<td><strong>Uptime</strong><br />&nbsp;&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=Uptime&amp;order=up">&lt;</a>&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=Uptime&amp;order=down">&gt;</a></td>
<td width="70"><strong>Uptime</strong><br />&nbsp;&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=Uptime&amp;order=up">&lt;</a>&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=Uptime&amp;order=down">&gt;</a></td>
<td><strong>Links</strong><br />&nbsp;&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=LCount&amp;order=up">&lt;</a>&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=LCount&amp;order=down">&gt;</a></td>
<td><strong>RWIs</strong><br />&nbsp;&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=ICount&amp;order=up">&lt;</a>&nbsp;<a href="/Network.html?page=#[page]#&amp;sort=ICount&amp;order=down">&gt;</a></td>
<td><strong>URLs for<br />Remote<br />Crawl</strong><br />&nbsp;</td>

View File

@ -89,6 +89,9 @@ public final class search {
// test:
// http://localhost:8080/yacy/search.html?query=4galTpdpDM5Q (search for linux)
// http://localhost:8080/yacy/search.html?query=gh8DKIhGKXws (search for book)
// http://localhost:8080/yacy/search.html?query=UEhMGfGv2vOE (search for kernel)
// http://localhost:8080/yacy/search.html?query=ZX-LjaYo74PP (search for help)
// http://localhost:8080/yacy/search.html?query=uDqIalxDfM2a (search for mail)
// http://localhost:8080/yacy/search.html?query=4galTpdpDM5Qgh8DKIhGKXws&abstracts=auto (search for linux and book, generate abstract automatically)
// http://localhost:8080/yacy/search.html?query=&abstracts=4galTpdpDM5Q (only abstracts for linux)
@ -223,7 +226,7 @@ public final class search {
// prepare reference hints
localProcess.startTimer();
Object[] ws = theSearch.references();
Object[] ws = theSearch.references(10);
StringBuffer refstr = new StringBuffer();
for (int j = 0; j < ws.length; j++)
refstr.append(",").append((String) ws[j]);

View File

@ -99,10 +99,6 @@ document.getElementById("Enter").value = "search again - catch up more links";
::
<p>No Results. (length of search words must be at least 3 characters)</p>
::
<p>No Results. Please repeat your search to see if there are late-responses from remote peers.</p>
::
<p>No Results. &quot;<strong>#[wrong_regex]#</strong>&quot; is no valid regular expression. Please go back to the previous page and make sure to enter a valid regular expressions for URL mask and Prefer mask.</p>
::
<p><strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[linkcount]#</strong> results from a total number of <strong id="totalcount">#[totalcount]#</strong> known#(globalresults)#.::, <strong id="globalcount">#[globalcount]#</strong> links from other YaCy peers.#(/globalresults)#<div id="results"></div></p>
<p>Search Result Pages: <span id="pagenav">#[pagenav]#</span></p>
::
@ -111,19 +107,23 @@ document.getElementById("Enter").value = "search again - catch up more links";
<!-- the search results -->
#(combine)#::
<p><strong>Refine with</strong>:</p>
<p>#{words}#<a href="yacysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;time=#[time]#">#[word]#</a>#{/words}#</p>
<p><strong>Topwords</strong>:
#{words}#&nbsp;<a href="yacysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;time=#[time]#">#[word]#</a>#{/words}#
</p>
#(/combine)#
<script type="text/javascript">
var progressbar = new Progressbar(#[results]#, document.getElementById("results"));
</script>
<!-- linklist begin -->
<div id="hidden_results"></div>
#(resultTable)#::<table width="100%"><tr class="TableHeader"><td width="30%">Media</td><td width="70%">URL</tr>#(/resultTable)#
#{results}#
<!--#include virtual="yacysearchitem.html?item=#[item]#&eventID=#[eventID]#" -->
#{/results}#
#(resultTable)#::</table>#(/resultTable)#
<!-- linklist end -->
<!-- attach the bottomline -->
<!--#include virtual="yacysearchitem.html?bottomline=true&eventID=#[eventID]#" -->
</body>
</html>

View File

@ -49,14 +49,12 @@
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSearchEvent;
@ -78,8 +76,6 @@ import de.anomic.yacy.yacyURL;
public class yacysearch {
public static final int MAX_TOPWORDS = 24;
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
@ -142,7 +138,8 @@ public class yacysearch {
prop.put("excluded", 0);
prop.put("combine", 0);
prop.put("results", "");
prop.put("num-results", (searchAllowed) ? 0 : 6);
prop.put("resultTable", 0);
prop.put("num-results", (searchAllowed) ? 0 : 4);
return prop;
}
@ -286,7 +283,6 @@ public class yacysearch {
long timestamp = System.currentTimeMillis();
// create a new search event
String wrongregex = null;
if (plasmaSearchEvent.getEvent(theQuery.id()) == null) {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
@ -358,7 +354,9 @@ public class yacysearch {
prop.put("results_" + i + "_eventID", theQuery.id());
}
prop.put("results", theQuery.displayResults());
prop.put("resultTable", (contentdomCode <= 1) ? 0 : 1);
prop.put("eventID", theQuery.id()); // for bottomline
// process result of search
if (filtered.size() > 0) {
prop.put("excluded", 1);
@ -374,63 +372,7 @@ public class yacysearch {
prop.put("num-results", 1); // no results
}
} else {
final int totalcount = prop.getInt("num-results_totalcount", 0);
if (totalcount >= 10) {
final Object[] references = (Object[]) prop.get( "references", new String[0]);
prop.put("num-results", 5);
int hintcount = references.length;
if (hintcount > 0) {
prop.put("combine", 1);
// get the topwords
final TreeSet topwords = new TreeSet(kelondroNaturalOrder.naturalOrder);
String tmp = "";
for (int i = 0; i < hintcount; i++) {
tmp = (String) references[i];
if (tmp.matches("[a-z]+")) {
topwords.add(tmp);
}
}
// filter out the badwords
final TreeSet filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
if (filteredtopwords.size() > 0) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords);
}
// avoid stopwords being topwords
if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) {
if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords);
}
}
String word;
hintcount = 0;
final Iterator iter = topwords.iterator();
while (iter.hasNext()) {
word = (String) iter.next();
if (word != null) {
prop.put("combine_words_" + hintcount + "_word", word);
prop.put("combine_words_" + hintcount + "_newsearch", post.get("search", "").replace(' ', '+') + "+" + word);
prop.put("combine_words_" + hintcount + "_count", count);
prop.put("combine_words_" + hintcount + "_offset", offset);
prop.put("combine_words_" + hintcount + "_resource", ((global) ? "global" : "local"));
prop.put("combine_words_" + hintcount + "_time", (searchtime / 1000));
}
prop.put("combine_words", hintcount);
if (hintcount++ > MAX_TOPWORDS) {
break;
}
}
}
} else {
if (wrongregex != null) {
prop.put("num-results_wrong_regex", wrongregex);
prop.put("num-results", 4);
} else {
prop.put("num-results", 5);
}
}
prop.put("num-results", 3);
}
prop.put("input_cat", "href");

View File

@ -20,13 +20,30 @@
<p class="urlinfo">#[date]# | YBR-#[ybr]# | <a href="ViewFile.html?urlHash=#[urlhash]#&amp;words=#[words]#">Info</a> | <a href="yacysearch.html?cat=image&amp;url=#[url]#&amp;search=#[former]#">Pictures</a></p>
</div>
::
#{images}#
#{items}#
<div class="thumbcontainer">
<a href="#[href]#" class="thumblink"><img src="/ViewImage.png?maxwidth=96&maxheight=96&code=#[code]#" alt="#[name]#"></a>
<div class="TableCellDark"><a href="#[href]#">#[name]#</a></div>
</div>
#{/images}#
#{/items}#
::
#{items}#
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#">#[hrefshort]#</a></tr>
#{/items}#
::
#{items}#
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#">#[hrefshort]#</a></tr>
#{/items}#
::
#{items}#
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#">#[hrefshort]#</a></tr>
#{/items}#
#(/content)#
#(references)#::
<p><strong>Topwords</strong>:
#{words}#&nbsp;<a href="yacysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;time=#[time]#">#[word]#</a>#{/words}#
</p>
#(/references)#
<script type="text/javascript">
statistics("#[offset]#", "#[items]#", "#[global]#", "#[total]#");
progressbar.step(1);

View File

@ -28,9 +28,12 @@ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchPreOrder;
import de.anomic.plasma.plasmaSearchQuery;
@ -49,12 +52,17 @@ import de.anomic.yacy.yacyURL;
public class yacysearchitem {
private static boolean col = true;
private static final int namelength = 60;
private static final int urllength = 120;
private static final int MAX_TOPWORDS = 24;
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
final serverObjects prop = new serverObjects();
String eventID = post.get("eventID", "");
int item = post.getInt("item", -1);
boolean bottomline = post.get("bottomline", "false").equals("true");
boolean authenticated = sb.adminAuthenticated(header) >= 2;
// find search event
@ -62,14 +70,73 @@ public class yacysearchitem {
plasmaSearchQuery theQuery = theSearch.getQuery();
plasmaSearchRankingProfile ranking = theSearch.getRanking();
// generate result object
plasmaSearchEvent.ResultEntry result = theSearch.oneResult(item);
// dynamically update count values
prop.put("offset", theQuery.neededResults() - theQuery.displayResults() + 1);
prop.put("items", item + 1);
prop.put("global", theSearch.getGlobalCount());
prop.put("total", theSearch.getGlobalCount() + theSearch.getLocalCount());
prop.put("items", theQuery.displayResults());
if (bottomline) {
// attach the bottom line with search references (topwords)
final Object[] references = theSearch.references(20);
int hintcount = references.length;
if (hintcount > 0) {
prop.put("references", 1);
// get the topwords
final TreeSet topwords = new TreeSet(kelondroNaturalOrder.naturalOrder);
String tmp = "";
for (int i = 0; i < hintcount; i++) {
tmp = (String) references[i];
if (tmp.matches("[a-z]+")) {
topwords.add(tmp);
}
}
// filter out the badwords
final TreeSet filteredtopwords = kelondroMSetTools.joinConstructive(topwords, plasmaSwitchboard.badwords);
if (filteredtopwords.size() > 0) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords);
}
// avoid stopwords being topwords
if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) {
if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) {
kelondroMSetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords);
}
}
String word;
hintcount = 0;
final Iterator iter = topwords.iterator();
while (iter.hasNext()) {
word = (String) iter.next();
if (word != null) {
prop.put("references_words_" + hintcount + "_word", word);
prop.put("references_words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
prop.put("references_words_" + hintcount + "_count", theQuery.displayResults());
prop.put("references_words_" + hintcount + "_offset", 0);
prop.put("references_words_" + hintcount + "_resource", theQuery.searchdom());
prop.put("references_words_" + hintcount + "_time", (theQuery.maximumTime / 1000));
}
prop.put("references_words", hintcount);
if (hintcount++ > MAX_TOPWORDS) {
break;
}
}
} else {
prop.put("references", 0);
}
return prop;
}
// no bottomline
prop.put("references", 0);
// generate result object
int item = post.getInt("item", -1);
prop.put("items", (item < 0) ? theQuery.displayResults() : item + 1);
plasmaSearchEvent.ResultEntry result = theSearch.oneResult(item);
if (result == null) {
prop.put("content", 0); // no content
@ -99,7 +166,7 @@ public class yacysearchitem {
prop.put("content_faviconCode", sb.licensedURLs.aquireLicense(faviconURL)); // aquire license for favicon url loading
prop.put("content_urlhash", result.hash());
prop.put("content_urlhexhash", yacySeed.b64Hash2hexHash(result.hash()));
prop.put("content_urlname", nxTools.shortenURLString(result.urlname(), 120));
prop.put("content_urlname", nxTools.shortenURLString(result.urlname(), urllength));
prop.put("content_date", plasmaSwitchboard.dateString(result.modified()));
prop.put("content_ybr", plasmaSearchPreOrder.ybr(result.hash()));
prop.put("content_size", Long.toString(result.filesize()));
@ -128,19 +195,49 @@ public class yacysearchitem {
for (int i = 0; i < images.size(); i++) {
ms = (plasmaSnippetCache.MediaSnippet) images.get(i);
try {url = new yacyURL(ms.href, null);} catch (MalformedURLException e) {continue;}
prop.put("content_images_" + i + "_href", ms.href);
prop.put("content_images_" + i + "_code", sb.licensedURLs.aquireLicense(url));
prop.put("content_images_" + i + "_name", ms.name);
prop.put("content_images_" + i + "_attr", ms.attr); // attributes, here: original size of image
prop.put("content_items_" + i + "_href", ms.href);
prop.put("content_items_" + i + "_code", sb.licensedURLs.aquireLicense(url));
prop.put("content_items_" + i + "_name", shorten(ms.name, namelength));
prop.put("content_items_" + i + "_attr", ms.attr); // attributes, here: original size of image
c++;
}
prop.put("content_images", c);
prop.put("content_items", c);
} else {
prop.put("content_images", 0);
prop.put("content_items", 0);
}
}
if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ||
(theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ||
(theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_APP)) {
// any other media content
ArrayList /* of plasmaSnippetCache.MediaSnippet */ media = result.mediaSnippets();
if (item == 0) col = true;
if (media != null) {
plasmaSnippetCache.MediaSnippet ms;
int c = 0;
for (int i = 0; i < media.size(); i++) {
ms = (plasmaSnippetCache.MediaSnippet) media.get(i);
prop.put("content_items_" + i + "_href", ms.href);
prop.put("content_items_" + i + "_hrefshort", nxTools.shortenURLString(ms.href, urllength));
prop.put("content_items_" + i + "_name", shorten(ms.name, namelength));
prop.put("content_items_" + i + "_col", (col) ? 0 : 1);
c++;
col = !col;
}
prop.put("content_items", c);
} else {
prop.put("content_items", 0);
}
}
return prop;
}
private static String shorten(String s, int length) {
if (s.length() <= length) return s;
int p = s.lastIndexOf('.');
if (p < 0) return s.substring(0, length - 3) + "...";
return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p);
}
}

View File

@ -60,7 +60,7 @@ public class plasmaCrawlNURL {
public static final int STACK_TYPE_MOVIE = 12; // put on movie stack
public static final int STACK_TYPE_MUSIC = 13; // put on music stack
private static final long minimumLocalDelta = 100; // the minimum time difference between access of the same local domain
private static final long minimumLocalDelta = 50; // the minimum time difference between access of the same local domain
private static final long minimumGlobalDelta = 500; // the minimum time difference between access of the same global domain
private static final long maximumDomAge = 60000; // the maximum age of a domain until it is used for another crawl attempt

View File

@ -32,6 +32,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
@ -64,7 +65,7 @@ public final class plasmaSearchEvent {
private plasmaSearchProcessing process;
private yacySearch[] primarySearchThreads, secondarySearchThreads;
private TreeMap preselectedPeerHashes;
private Object[] references;
//private Object[] references;
public TreeMap IAResults, IACount;
public String IAmaxcounthash, IAneardhthash;
private int localcount;
@ -89,7 +90,6 @@ public final class plasmaSearchEvent {
this.primarySearchThreads = null;
this.secondarySearchThreads = null;
this.preselectedPeerHashes = preselectedPeerHashes;
this.references = new String[0];
this.IAResults = new TreeMap();
this.IACount = new TreeMap();
this.IAmaxcounthash = null;
@ -250,8 +250,14 @@ public final class plasmaSearchEvent {
// fetch next entry to work on
indexContainer c = rankedCache.container();
indexRWIEntry entry = new indexRWIEntry(c.get(rankedIndex++));
ResultEntry resultEntry = obtainResultEntry(entry, false);
indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry);
if (page == null) {
registerFailure(entry.urlHash(), "url does not exist in lurl-db");
continue;
}
ResultEntry resultEntry = obtainResultEntry(page, false);
if (resultEntry == null) continue; // the entry had some problems, cannot be used
// place the result to the result vector
@ -267,18 +273,34 @@ public final class plasmaSearchEvent {
process.yield("offline snippet fetch", resultList.size());
}
// remove old events in the event cache
Iterator i = lastEvents.entrySet().iterator();
while (i.hasNext()) {
if (((plasmaSearchEvent) ((Map.Entry) i.next()).getValue()).eventTime + eventLifetime < System.currentTimeMillis()) i.remove();
}
// clean up events
cleanupEvents();
// store this search to a cache so it can be re-used
lastEvents.put(query.id(), this);
lastEventID = query.id();
}
private ResultEntry obtainResultEntry(indexRWIEntry entry, boolean fetchSnippetOnline) {
private static void cleanupEvents() {
// remove old events in the event cache
Iterator i = lastEvents.entrySet().iterator();
plasmaSearchEvent cleanEvent;
while (i.hasNext()) {
cleanEvent = (plasmaSearchEvent) ((Map.Entry) i.next()).getValue();
if (cleanEvent.eventTime + eventLifetime < System.currentTimeMillis()) {
// execute deletion of failed words
Set removeWords = cleanEvent.query.queryHashes;
removeWords.addAll(cleanEvent.query.excludeHashes);
cleanEvent.wordIndex.removeEntriesMultiple(removeWords, cleanEvent.failedURLs.keySet());
serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id() + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words");
// remove the event
i.remove();
}
}
}
private ResultEntry obtainResultEntry(indexURLEntry page, boolean fetchSnippetOnline) {
// a search result entry needs some work to produce a result Entry:
// - check if url entry exists in LURL-db
@ -288,39 +310,24 @@ public final class plasmaSearchEvent {
// load only urls if there was not yet a root url of that hash
// find the url entry
indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry);
if (page == null) {
registerFailure(entry.urlHash(), "url does not exist in lurl-db");
return null;
}
indexURLEntry.Components comp = page.comp();
String pagetitle = comp.title().toLowerCase();
if (comp.url() == null) {
registerFailure(entry.urlHash(), "url corrupted (null)");
registerFailure(page.hash(), "url corrupted (null)");
return null; // rare case where the url is corrupted
}
String pageurl = comp.url().toString().toLowerCase();
String pageauthor = comp.author().toLowerCase();
// check exclusion
if (plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) {
registerFailure(entry.urlHash(), "query-exclusion matches title: " + pagetitle);
return null;
}
if (plasmaSearchQuery.matches(pageurl, query.excludeHashes)) {
registerFailure(entry.urlHash(), "query-exclusion matches title: " + pagetitle);
return null;
}
if (plasmaSearchQuery.matches(pageauthor, query.excludeHashes)) {
registerFailure(entry.urlHash(), "query-exclusion matches title: " + pagetitle);
if ((plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) ||
(plasmaSearchQuery.matches(pageurl, query.excludeHashes)) ||
(plasmaSearchQuery.matches(pageauthor, query.excludeHashes))) {
return null;
}
// check url mask
if (!(pageurl.matches(query.urlMask))) {
registerFailure(entry.urlHash(), "url-exclusion matches urlMask: " + pageurl);
return null;
}
@ -330,24 +337,24 @@ public final class plasmaSearchEvent {
(!(comp.title().startsWith("Index of")))) {
final Iterator wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
registerFailure(entry.urlHash(), "index-of constrained not fullfilled");
registerFailure(page.hash(), "index-of constrained not fullfilled");
return null;
}
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() == 0)) {
registerFailure(entry.urlHash(), "contentdom-audio constrained not fullfilled");
registerFailure(page.hash(), "contentdom-audio constrained not fullfilled");
return null;
}
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() == 0)) {
registerFailure(entry.urlHash(), "contentdom-video constrained not fullfilled");
registerFailure(page.hash(), "contentdom-video constrained not fullfilled");
return null;
}
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() == 0)) {
registerFailure(entry.urlHash(), "contentdom-image constrained not fullfilled");
registerFailure(page.hash(), "contentdom-image constrained not fullfilled");
return null;
}
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() == 0)) {
registerFailure(entry.urlHash(), "contentdom-app constrained not fullfilled");
registerFailure(page.hash(), "contentdom-app constrained not fullfilled");
return null;
}
@ -364,7 +371,7 @@ public final class plasmaSearchEvent {
return new ResultEntry(page, wordIndex, null, null); // result without snippet
} else {
// problems with snippet fetch
registerFailure(entry.urlHash(), "no text snippet for URL " + comp.url());
registerFailure(page.hash(), "no text snippet for URL " + comp.url());
plasmaSnippetCache.failConsequences(snippet, query.id());
return null;
}
@ -378,7 +385,7 @@ public final class plasmaSearchEvent {
return new ResultEntry(page, wordIndex, null, null);
} else {
// problems with snippet fetch
registerFailure(entry.urlHash(), "no media snippet for URL " + comp.url());
registerFailure(page.hash(), "no media snippet for URL " + comp.url());
return null;
}
}
@ -492,18 +499,24 @@ public final class plasmaSearchEvent {
while ((resultList.size() < query.neededResults() + query.displayResults()) && (System.currentTimeMillis() < this.timeout)) {
// try secondary search
prepareSecondarySearch();
prepareSecondarySearch(); // will be executed only once
// fetch next entry to work on
this.entry = null;
entry = nextOrder();
if (entry == null) {
// wait and try again
try {Thread.sleep(200);} catch (InterruptedException e) {}
try {Thread.sleep(100);} catch (InterruptedException e) {}
continue;
}
ResultEntry resultEntry = obtainResultEntry(entry, true);
indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry);
if (page == null) {
registerFailure(entry.urlHash(), "url does not exist in lurl-db");
continue;
}
ResultEntry resultEntry = obtainResultEntry(page, true);
if (resultEntry == null) continue; // the entry had some problems, cannot be used
// place the result to the result vector
@ -730,8 +743,8 @@ public final class plasmaSearchEvent {
//assert e != null;
}
public Object[] references() {
return this.references;
public Object[] references(int count) {
return this.rankedCache.getReferences(count);
}
public static class ResultEntry {