mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- completed the author navigation
- removed some unused variables git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6037 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
a9a8b8d161
commit
27fa6a66ad
|
@ -90,6 +90,7 @@ public final class search {
|
|||
final String contentdom = post.get("contentdom", "text");
|
||||
final String filter = post.get("filter", ".*");
|
||||
String sitehash = post.get("sitehash", ""); if (sitehash.length() == 0) sitehash = null;
|
||||
String authorhash = post.get("authorhash", ""); if (authorhash.length() == 0) authorhash = null;
|
||||
String language = post.get("language", "");
|
||||
if (!iso639.exists(language)) {
|
||||
// take language from the user agent
|
||||
|
@ -204,6 +205,7 @@ public final class search {
|
|||
null,
|
||||
false,
|
||||
sitehash,
|
||||
authorhash,
|
||||
yacyURL.TLD_any_zone_filter,
|
||||
client,
|
||||
false);
|
||||
|
@ -255,7 +257,8 @@ public final class search {
|
|||
-1,
|
||||
constraint,
|
||||
false,
|
||||
sitehash,
|
||||
sitehash,
|
||||
authorhash,
|
||||
yacyURL.TLD_any_zone_filter,
|
||||
client,
|
||||
false);
|
||||
|
|
|
@ -282,6 +282,28 @@ public class yacysearch {
|
|||
while(domain.endsWith(".")) domain = domain.substring(0, domain.length() - 1);
|
||||
sitehash = yacyURL.domhash(domain);
|
||||
}
|
||||
int authori = querystring.indexOf("author:");
|
||||
String authorhash = null;
|
||||
if (authori >= 0) {
|
||||
// check if the author was given with single quotes or without
|
||||
boolean quotes = false;
|
||||
if (querystring.charAt(authori + 7) == (char) 39) {
|
||||
quotes = true;
|
||||
}
|
||||
String author;
|
||||
if (quotes) {
|
||||
int ftb = querystring.indexOf((char) 39, authori + 8);
|
||||
if (ftb == -1) ftb = querystring.length() + 1;
|
||||
author = querystring.substring(authori + 8, ftb);
|
||||
querystring = querystring.replace("author:'" + author + "'", "");
|
||||
} else {
|
||||
int ftb = querystring.indexOf(' ', authori);
|
||||
if (ftb == -1) ftb = querystring.length();
|
||||
author = querystring.substring(authori + 7, ftb);
|
||||
querystring = querystring.replace("author:" + author, "");
|
||||
}
|
||||
authorhash = new String(Word.word2hash(author));
|
||||
}
|
||||
int tld = querystring.indexOf("tld:");
|
||||
if (tld >= 0) {
|
||||
int ftb = querystring.indexOf(' ', tld);
|
||||
|
@ -401,6 +423,7 @@ public class yacysearch {
|
|||
constraint,
|
||||
true,
|
||||
sitehash,
|
||||
authorhash,
|
||||
yacyURL.TLD_any_zone_filter,
|
||||
client,
|
||||
authenticated);
|
||||
|
|
|
@ -4,12 +4,7 @@
|
|||
<li>#[url]#</li>
|
||||
#{/element}#</ul></div>
|
||||
#(/nav-domains)#
|
||||
#(nav-topics)#::
|
||||
<h3 style="padding-left:25px;">Topics</h3>
|
||||
<div><ul style="padding-left: 0px;">#{element}#
|
||||
<li>#[url]#</li>
|
||||
#{/element}#</ul></div>
|
||||
#(/nav-topics)#
|
||||
|
||||
#(nav-authors)#::
|
||||
<h3 style="padding-left:25px;">Authors</h3>
|
||||
<div><ul style="padding-left: 0px;">#{element}#
|
||||
|
@ -17,6 +12,13 @@
|
|||
#{/element}#</ul></div>
|
||||
#(/nav-authors)#
|
||||
|
||||
#(nav-topics)#::
|
||||
<h3 style="padding-left:25px;">Topics</h3>
|
||||
<div><ul style="padding-left: 0px;">#{element}#
|
||||
<li>#[url]#</li>
|
||||
#{/element}#</ul></div>
|
||||
#(/nav-topics)#
|
||||
|
||||
<h3 style="padding-left:25px;">Timeline</h3>
|
||||
<div>
|
||||
<p>
|
||||
|
|
|
@ -115,11 +115,13 @@ public class yacysearchtrailer {
|
|||
prop.put("nav-authors", 1);
|
||||
NavigatorEntry entry;
|
||||
int i;
|
||||
String anav;
|
||||
for (i = 0; i < authorNavigator.size(); i++) {
|
||||
entry = authorNavigator.get(i);
|
||||
anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name + "'";
|
||||
prop.put("nav-authors_element_" + i + "_name", entry.name);
|
||||
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + plasmaSearchQuery.navurl("html", 0, display, theQuery, theQuery.urlMask, "author:'" + entry.name + "'", theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
|
||||
prop.putJSON("nav-authors_element_" + i + "_url-json", plasmaSearchQuery.navurl("json", 0, display, theQuery, theQuery.urlMask, "author:'" + entry.name + "'", theQuery.navigators));
|
||||
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + plasmaSearchQuery.navurl("html", 0, display, theQuery, theQuery.urlMask, anav, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
|
||||
prop.putJSON("nav-authors_element_" + i + "_url-json", plasmaSearchQuery.navurl("json", 0, display, theQuery, theQuery.urlMask, anav, theQuery.navigators));
|
||||
prop.put("nav-authors_element_" + i + "_count", entry.count);
|
||||
prop.put("nav-authors_element_" + i + "_modifier", "author:'" + entry.name + "'");
|
||||
prop.put("nav-authors_element_" + i + "_nl", 1);
|
||||
|
|
|
@ -11,7 +11,20 @@
|
|||
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
|
||||
#{/element}#
|
||||
]
|
||||
},#(/nav-domains)##(nav-topics)#::
|
||||
},#(/nav-domains)##(nav-authors)#::
|
||||
{
|
||||
"facetname": "authors",
|
||||
"displayname": "Authors",
|
||||
"type": "String",
|
||||
"min": "0",
|
||||
"max": "0",
|
||||
"mean": "0",
|
||||
"elements": [
|
||||
#{element}#
|
||||
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
|
||||
#{/element}#
|
||||
]
|
||||
}#(/nav-authors)##(nav-topics)#::
|
||||
{
|
||||
"facetname": "topwords",
|
||||
"displayname": "Topics",
|
||||
|
|
|
@ -67,7 +67,7 @@ public class blogBoard {
|
|||
new File(actpath.getParent()).mkdir();
|
||||
new File(newFile.getParent()).mkdir();
|
||||
if (database == null) {
|
||||
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_');
|
||||
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ public class blogBoardComments {
|
|||
new File(actpath.getParent()).mkdir();
|
||||
new File(newFile.getParent()).mkdir();
|
||||
if (database == null) {
|
||||
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_');
|
||||
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -116,17 +116,17 @@ public class bookmarksDB {
|
|||
tagCache=new TreeMap<String, Tag>();
|
||||
bookmarksFile.getParentFile().mkdirs();
|
||||
//this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false));
|
||||
this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, bookmarksFileNew), 1000, '_');
|
||||
this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, bookmarksFileNew), 1000, '_');
|
||||
|
||||
// tags
|
||||
tagsFile.getParentFile().mkdirs();
|
||||
final boolean tagsFileExisted = tagsFile.exists();
|
||||
this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, tagsFileNew), 500, '_');
|
||||
this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, tagsFileNew), 500, '_');
|
||||
if (!tagsFileExisted) rebuildTags();
|
||||
|
||||
// dates
|
||||
final boolean datesExisted = datesFile.exists();
|
||||
this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, false, false, datesFileNew), 500, '_');
|
||||
this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_');
|
||||
if (!datesExisted) rebuildDates();
|
||||
|
||||
// autoReCrawl
|
||||
|
|
|
@ -55,7 +55,7 @@ public class messageBoard {
|
|||
new File(path.getParent()).mkdir();
|
||||
new File(pathNew.getParent()).mkdir();
|
||||
if (database == null) {
|
||||
database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, false, false, pathNew), 500, '_');
|
||||
database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, pathNew), 500, '_');
|
||||
}
|
||||
sn = 0;
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ public final class userDB {
|
|||
this.userTableFile = userTableFileNew;
|
||||
userTableFile.getParentFile().mkdirs();
|
||||
userTableFileNew.getParentFile().mkdirs();
|
||||
this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, false, false, userTableFile), 10, '_');
|
||||
this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, userTableFile), 10, '_');
|
||||
}
|
||||
|
||||
void resetDatabase() {
|
||||
|
|
|
@ -57,11 +57,11 @@ public class wikiBoard {
|
|||
final File bkppath, final File bkppathNew) throws IOException {
|
||||
new File(actpath.getParent()).mkdirs();
|
||||
if (datbase == null) {
|
||||
datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, actpathNew), 500, '_');
|
||||
datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, actpathNew), 500, '_');
|
||||
}
|
||||
new File(bkppath.getParent()).mkdirs();
|
||||
if (bkpbase == null) {
|
||||
bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, false, false, bkppathNew), 500, '_');
|
||||
bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, bkppathNew), 500, '_');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ public class BLOBTree {
|
|||
* Deprecated Class. Please use kelondroBLOBHeap instead
|
||||
*/
|
||||
private BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
|
||||
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail) {
|
||||
final int nodesize, final char fillChar, final ByteOrder objectOrder) {
|
||||
// creates or opens a dynamic tree
|
||||
rowdef = new Row("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize, objectOrder);
|
||||
ObjectIndex fbi;
|
||||
|
@ -78,17 +78,8 @@ public class BLOBTree {
|
|||
fbi = new Tree(file, useNodeCache, 0, rowdef, 1, 8);
|
||||
} catch (final IOException e) {
|
||||
e.printStackTrace();
|
||||
if (resetOnFail) {
|
||||
FileUtils.deletedelete(file);
|
||||
try {
|
||||
fbi = new Tree(file, useNodeCache, -1, rowdef, 1, 8);
|
||||
} catch (final IOException e1) {
|
||||
e1.printStackTrace();
|
||||
throw new kelondroException(e.getMessage());
|
||||
}
|
||||
} else {
|
||||
throw new kelondroException(e.getMessage());
|
||||
}
|
||||
FileUtils.deletedelete(file);
|
||||
throw new kelondroException(e.getMessage());
|
||||
}
|
||||
this.index = ((useObjectCache) && (!(fbi instanceof EcoTable))) ? (ObjectIndex) new Cache(fbi) : fbi;
|
||||
this.keylen = key;
|
||||
|
@ -100,13 +91,13 @@ public class BLOBTree {
|
|||
}
|
||||
|
||||
public static BLOBHeap toHeap(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
|
||||
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException {
|
||||
final int nodesize, final char fillChar, final ByteOrder objectOrder, final File blob) throws IOException {
|
||||
if (blob.exists() || !file.exists()) {
|
||||
// open the blob file and ignore the tree
|
||||
return new BLOBHeap(blob, key, objectOrder, 1024 * 64);
|
||||
}
|
||||
// open a Tree and migrate everything to a Heap
|
||||
BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, writebuffer, resetOnFail);
|
||||
BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder);
|
||||
BLOBHeap heap = new BLOBHeap(blob, key, objectOrder, 1024 * 64);
|
||||
Iterator<byte[]> i = tree.keys(true, false);
|
||||
byte[] k, kk = new byte[key], v;
|
||||
|
|
|
@ -144,7 +144,7 @@ public class plasmaRankingCRProcess {
|
|||
return true;
|
||||
}
|
||||
|
||||
private static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCell<CitationReferenceRow> seq) throws IOException {
|
||||
public static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCell<CitationReferenceRow> seq) throws IOException {
|
||||
// open file
|
||||
AttrSeq source_cr = null;
|
||||
try {
|
||||
|
|
|
@ -74,6 +74,7 @@ public final class plasmaSearchQuery {
|
|||
public plasmaSearchRankingProfile ranking;
|
||||
public String host; // this is the client host that starts the query, not a site operator
|
||||
public String sitehash; // this is a domain hash, 6 bytes long or null
|
||||
public String authorhash;
|
||||
public yacySeed remotepeer;
|
||||
public Long handle;
|
||||
// values that are set after a search:
|
||||
|
@ -113,6 +114,7 @@ public final class plasmaSearchQuery {
|
|||
this.onlineSnippetFetch = false;
|
||||
this.host = null;
|
||||
this.sitehash = null;
|
||||
this.authorhash = null;
|
||||
this.remotepeer = null;
|
||||
this.handle = Long.valueOf(System.currentTimeMillis());
|
||||
this.specialRights = false;
|
||||
|
@ -132,6 +134,7 @@ public final class plasmaSearchQuery {
|
|||
final int domType, final String domGroupName, final int domMaxTargets,
|
||||
final Bitfield constraint, final boolean allofconstraint,
|
||||
final String site,
|
||||
final String authorhash,
|
||||
final int domainzone,
|
||||
final String host,
|
||||
final boolean specialRights) {
|
||||
|
@ -155,6 +158,7 @@ public final class plasmaSearchQuery {
|
|||
this.constraint = constraint;
|
||||
this.allofconstraint = allofconstraint;
|
||||
this.sitehash = site; assert site == null || site.length() == 6;
|
||||
this.authorhash = authorhash; assert authorhash == null || authorhash.length() > 0;
|
||||
this.onlineSnippetFetch = onlineSnippetFetch;
|
||||
this.host = host;
|
||||
this.remotepeer = null;
|
||||
|
@ -325,6 +329,7 @@ public final class plasmaSearchQuery {
|
|||
"*" + this.prefer +
|
||||
"*" + this.urlMask +
|
||||
"*" + this.sitehash +
|
||||
"*" + this.authorhash +
|
||||
"*" + this.targetlang +
|
||||
"*" + this.constraint +
|
||||
"*" + this.maxDistance;
|
||||
|
|
|
@ -35,13 +35,11 @@ import java.util.HashMap;
|
|||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import de.anomic.htmlFilter.htmlFilterContentScraper;
|
||||
import de.anomic.kelondro.index.BinSearch;
|
||||
import de.anomic.kelondro.order.Base64Order;
|
||||
import de.anomic.kelondro.order.Digest;
|
||||
import de.anomic.kelondro.text.Reference;
|
||||
import de.anomic.kelondro.text.ReferenceContainer;
|
||||
|
@ -78,9 +76,9 @@ public final class plasmaSearchRankingProcess {
|
|||
private final Segment indexSegment;
|
||||
private HashMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
|
||||
private final int[] domZones;
|
||||
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
|
||||
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
|
||||
private final TreeMap<byte[], AuthorInfo> authorNavigator;
|
||||
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
|
||||
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
|
||||
|
||||
public plasmaSearchRankingProcess(
|
||||
final Segment indexSegment,
|
||||
|
@ -107,7 +105,7 @@ public final class plasmaSearchRankingProcess {
|
|||
this.flagcount = new int[32];
|
||||
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
|
||||
this.hostNavigator = new ConcurrentHashMap<String, HostInfo>();
|
||||
this.authorNavigator = new TreeMap<byte[], AuthorInfo>(Base64Order.enhancedCoder);
|
||||
this.authorNavigator = new ConcurrentHashMap<String, AuthorInfo>();
|
||||
this.ref = new ConcurrentHashMap<String, Integer>();
|
||||
this.domZones = new int[8];
|
||||
for (int i = 0; i < 8; i++) {this.domZones[i] = 0;}
|
||||
|
@ -330,16 +328,25 @@ public final class plasmaSearchRankingProcess {
|
|||
// author navigation:
|
||||
String author = metadata.dc_creator();
|
||||
if (author != null && author.length() > 0) {
|
||||
byte[] authorhash = Word.word2hash(author);
|
||||
//synchronized (this.authorNavigator) {
|
||||
AuthorInfo in = this.authorNavigator.get(authorhash);
|
||||
if (in == null) {
|
||||
this.authorNavigator.put(authorhash, new AuthorInfo(author));
|
||||
} else {
|
||||
in.inc();
|
||||
this.authorNavigator.put(authorhash, in);
|
||||
}
|
||||
//}
|
||||
// add author to the author navigator
|
||||
String authorhash = new String(Word.word2hash(author));
|
||||
System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author);
|
||||
|
||||
// check if we already are filtering for authors
|
||||
if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// add author to the author navigator
|
||||
AuthorInfo in = this.authorNavigator.get(authorhash);
|
||||
if (in == null) {
|
||||
this.authorNavigator.put(authorhash, new AuthorInfo(author));
|
||||
} else {
|
||||
in.inc();
|
||||
this.authorNavigator.put(authorhash, in);
|
||||
}
|
||||
} else if (this.query.authorhash != null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get the url
|
||||
|
|
Loading…
Reference in New Issue
Block a user