- completed the author navigation

- removed some unused variables

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6037 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-06-08 23:30:12 +00:00
parent a9a8b8d161
commit 27fa6a66ad
15 changed files with 95 additions and 49 deletions

View File

@ -90,6 +90,7 @@ public final class search {
final String contentdom = post.get("contentdom", "text");
final String filter = post.get("filter", ".*");
String sitehash = post.get("sitehash", ""); if (sitehash.length() == 0) sitehash = null;
String authorhash = post.get("authorhash", ""); if (authorhash.length() == 0) authorhash = null;
String language = post.get("language", "");
if (!iso639.exists(language)) {
// take language from the user agent
@ -204,6 +205,7 @@ public final class search {
null,
false,
sitehash,
authorhash,
yacyURL.TLD_any_zone_filter,
client,
false);
@ -255,7 +257,8 @@ public final class search {
-1,
constraint,
false,
sitehash,
sitehash,
authorhash,
yacyURL.TLD_any_zone_filter,
client,
false);

View File

@ -282,6 +282,28 @@ public class yacysearch {
while(domain.endsWith(".")) domain = domain.substring(0, domain.length() - 1);
sitehash = yacyURL.domhash(domain);
}
int authori = querystring.indexOf("author:");
String authorhash = null;
if (authori >= 0) {
// check if the author was given with single quotes or without
boolean quotes = false;
if (querystring.charAt(authori + 7) == (char) 39) {
quotes = true;
}
String author;
if (quotes) {
int ftb = querystring.indexOf((char) 39, authori + 8);
if (ftb == -1) ftb = querystring.length() + 1;
author = querystring.substring(authori + 8, ftb);
querystring = querystring.replace("author:'" + author + "'", "");
} else {
int ftb = querystring.indexOf(' ', authori);
if (ftb == -1) ftb = querystring.length();
author = querystring.substring(authori + 7, ftb);
querystring = querystring.replace("author:" + author, "");
}
authorhash = new String(Word.word2hash(author));
}
int tld = querystring.indexOf("tld:");
if (tld >= 0) {
int ftb = querystring.indexOf(' ', tld);
@ -401,6 +423,7 @@ public class yacysearch {
constraint,
true,
sitehash,
authorhash,
yacyURL.TLD_any_zone_filter,
client,
authenticated);

View File

@ -4,12 +4,7 @@
<li>#[url]#</li>
#{/element}#</ul></div>
#(/nav-domains)#
#(nav-topics)#::
<h3 style="padding-left:25px;">Topics</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
#(/nav-topics)#
#(nav-authors)#::
<h3 style="padding-left:25px;">Authors</h3>
<div><ul style="padding-left: 0px;">#{element}#
@ -17,6 +12,13 @@
#{/element}#</ul></div>
#(/nav-authors)#
#(nav-topics)#::
<h3 style="padding-left:25px;">Topics</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
#(/nav-topics)#
<h3 style="padding-left:25px;">Timeline</h3>
<div>
<p>

View File

@ -115,11 +115,13 @@ public class yacysearchtrailer {
prop.put("nav-authors", 1);
NavigatorEntry entry;
int i;
String anav;
for (i = 0; i < authorNavigator.size(); i++) {
entry = authorNavigator.get(i);
anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name + "'";
prop.put("nav-authors_element_" + i + "_name", entry.name);
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + plasmaSearchQuery.navurl("html", 0, display, theQuery, theQuery.urlMask, "author:'" + entry.name + "'", theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-authors_element_" + i + "_url-json", plasmaSearchQuery.navurl("json", 0, display, theQuery, theQuery.urlMask, "author:'" + entry.name + "'", theQuery.navigators));
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + plasmaSearchQuery.navurl("html", 0, display, theQuery, theQuery.urlMask, anav, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-authors_element_" + i + "_url-json", plasmaSearchQuery.navurl("json", 0, display, theQuery, theQuery.urlMask, anav, theQuery.navigators));
prop.put("nav-authors_element_" + i + "_count", entry.count);
prop.put("nav-authors_element_" + i + "_modifier", "author:'" + entry.name + "'");
prop.put("nav-authors_element_" + i + "_nl", 1);

View File

@ -11,7 +11,20 @@
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-domains)##(nav-topics)#::
},#(/nav-domains)##(nav-authors)#::
{
"facetname": "authors",
"displayname": "Authors",
"type": "String",
"min": "0",
"max": "0",
"mean": "0",
"elements": [
#{element}#
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
}#(/nav-authors)##(nav-topics)#::
{
"facetname": "topwords",
"displayname": "Topics",

View File

@ -67,7 +67,7 @@ public class blogBoard {
new File(actpath.getParent()).mkdir();
new File(newFile.getParent()).mkdir();
if (database == null) {
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_');
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_');
}
}

View File

@ -70,7 +70,7 @@ public class blogBoardComments {
new File(actpath.getParent()).mkdir();
new File(newFile.getParent()).mkdir();
if (database == null) {
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_');
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_');
}
}

View File

@ -116,17 +116,17 @@ public class bookmarksDB {
tagCache=new TreeMap<String, Tag>();
bookmarksFile.getParentFile().mkdirs();
//this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false));
this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, bookmarksFileNew), 1000, '_');
this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, bookmarksFileNew), 1000, '_');
// tags
tagsFile.getParentFile().mkdirs();
final boolean tagsFileExisted = tagsFile.exists();
this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, tagsFileNew), 500, '_');
this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, tagsFileNew), 500, '_');
if (!tagsFileExisted) rebuildTags();
// dates
final boolean datesExisted = datesFile.exists();
this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, false, false, datesFileNew), 500, '_');
this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_');
if (!datesExisted) rebuildDates();
// autoReCrawl

View File

@ -55,7 +55,7 @@ public class messageBoard {
new File(path.getParent()).mkdir();
new File(pathNew.getParent()).mkdir();
if (database == null) {
database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, false, false, pathNew), 500, '_');
database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, pathNew), 500, '_');
}
sn = 0;
}

View File

@ -60,7 +60,7 @@ public final class userDB {
this.userTableFile = userTableFileNew;
userTableFile.getParentFile().mkdirs();
userTableFileNew.getParentFile().mkdirs();
this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, false, false, userTableFile), 10, '_');
this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, userTableFile), 10, '_');
}
void resetDatabase() {

View File

@ -57,11 +57,11 @@ public class wikiBoard {
final File bkppath, final File bkppathNew) throws IOException {
new File(actpath.getParent()).mkdirs();
if (datbase == null) {
datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, actpathNew), 500, '_');
datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, actpathNew), 500, '_');
}
new File(bkppath.getParent()).mkdirs();
if (bkpbase == null) {
bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, false, false, bkppathNew), 500, '_');
bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, bkppathNew), 500, '_');
}
}

View File

@ -70,7 +70,7 @@ public class BLOBTree {
* Deprecated Class. Please use kelondroBLOBHeap instead
*/
private BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail) {
final int nodesize, final char fillChar, final ByteOrder objectOrder) {
// creates or opens a dynamic tree
rowdef = new Row("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize, objectOrder);
ObjectIndex fbi;
@ -78,17 +78,8 @@ public class BLOBTree {
fbi = new Tree(file, useNodeCache, 0, rowdef, 1, 8);
} catch (final IOException e) {
e.printStackTrace();
if (resetOnFail) {
FileUtils.deletedelete(file);
try {
fbi = new Tree(file, useNodeCache, -1, rowdef, 1, 8);
} catch (final IOException e1) {
e1.printStackTrace();
throw new kelondroException(e.getMessage());
}
} else {
throw new kelondroException(e.getMessage());
}
FileUtils.deletedelete(file);
throw new kelondroException(e.getMessage());
}
this.index = ((useObjectCache) && (!(fbi instanceof EcoTable))) ? (ObjectIndex) new Cache(fbi) : fbi;
this.keylen = key;
@ -100,13 +91,13 @@ public class BLOBTree {
}
public static BLOBHeap toHeap(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException {
final int nodesize, final char fillChar, final ByteOrder objectOrder, final File blob) throws IOException {
if (blob.exists() || !file.exists()) {
// open the blob file and ignore the tree
return new BLOBHeap(blob, key, objectOrder, 1024 * 64);
}
// open a Tree and migrate everything to a Heap
BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, writebuffer, resetOnFail);
BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder);
BLOBHeap heap = new BLOBHeap(blob, key, objectOrder, 1024 * 64);
Iterator<byte[]> i = tree.keys(true, false);
byte[] k, kk = new byte[key], v;

View File

@ -144,7 +144,7 @@ public class plasmaRankingCRProcess {
return true;
}
private static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCell<CitationReferenceRow> seq) throws IOException {
public static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCell<CitationReferenceRow> seq) throws IOException {
// open file
AttrSeq source_cr = null;
try {

View File

@ -74,6 +74,7 @@ public final class plasmaSearchQuery {
public plasmaSearchRankingProfile ranking;
public String host; // this is the client host that starts the query, not a site operator
public String sitehash; // this is a domain hash, 6 bytes long or null
public String authorhash;
public yacySeed remotepeer;
public Long handle;
// values that are set after a search:
@ -113,6 +114,7 @@ public final class plasmaSearchQuery {
this.onlineSnippetFetch = false;
this.host = null;
this.sitehash = null;
this.authorhash = null;
this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = false;
@ -132,6 +134,7 @@ public final class plasmaSearchQuery {
final int domType, final String domGroupName, final int domMaxTargets,
final Bitfield constraint, final boolean allofconstraint,
final String site,
final String authorhash,
final int domainzone,
final String host,
final boolean specialRights) {
@ -155,6 +158,7 @@ public final class plasmaSearchQuery {
this.constraint = constraint;
this.allofconstraint = allofconstraint;
this.sitehash = site; assert site == null || site.length() == 6;
this.authorhash = authorhash; assert authorhash == null || authorhash.length() > 0;
this.onlineSnippetFetch = onlineSnippetFetch;
this.host = host;
this.remotepeer = null;
@ -325,6 +329,7 @@ public final class plasmaSearchQuery {
"*" + this.prefer +
"*" + this.urlMask +
"*" + this.sitehash +
"*" + this.authorhash +
"*" + this.targetlang +
"*" + this.constraint +
"*" + this.maxDistance;

View File

@ -35,13 +35,11 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.kelondro.index.BinSearch;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.Digest;
import de.anomic.kelondro.text.Reference;
import de.anomic.kelondro.text.ReferenceContainer;
@ -78,9 +76,9 @@ public final class plasmaSearchRankingProcess {
private final Segment indexSegment;
private HashMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private final int[] domZones;
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
private final TreeMap<byte[], AuthorInfo> authorNavigator;
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
public plasmaSearchRankingProcess(
final Segment indexSegment,
@ -107,7 +105,7 @@ public final class plasmaSearchRankingProcess {
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new ConcurrentHashMap<String, HostInfo>();
this.authorNavigator = new TreeMap<byte[], AuthorInfo>(Base64Order.enhancedCoder);
this.authorNavigator = new ConcurrentHashMap<String, AuthorInfo>();
this.ref = new ConcurrentHashMap<String, Integer>();
this.domZones = new int[8];
for (int i = 0; i < 8; i++) {this.domZones[i] = 0;}
@ -330,16 +328,25 @@ public final class plasmaSearchRankingProcess {
// author navigation:
String author = metadata.dc_creator();
if (author != null && author.length() > 0) {
byte[] authorhash = Word.word2hash(author);
//synchronized (this.authorNavigator) {
AuthorInfo in = this.authorNavigator.get(authorhash);
if (in == null) {
this.authorNavigator.put(authorhash, new AuthorInfo(author));
} else {
in.inc();
this.authorNavigator.put(authorhash, in);
}
//}
// add author to the author navigator
String authorhash = new String(Word.word2hash(author));
System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author);
// check if we already are filtering for authors
if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) {
continue;
}
// add author to the author navigator
AuthorInfo in = this.authorNavigator.get(authorhash);
if (in == null) {
this.authorNavigator.put(authorhash, new AuthorInfo(author));
} else {
in.inc();
this.authorNavigator.put(authorhash, in);
}
} else if (this.query.authorhash != null) {
continue;
}
// get the url