mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- added automatic tag generation when a web page from the search results is added
- added new image 'B' in front of search results for bookmark generation - added news generation when a public bookmark is added - the '+' in front of search results has new meaning: positive rating for that result - added news generation when a '+' is hit git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2613 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
8a30c5343d
commit
3aac5b26da
|
@ -158,13 +158,10 @@ public class Blog {
|
|||
|
||||
// create a news message
|
||||
HashMap map = new HashMap();
|
||||
map.put("subject", StrSubject);
|
||||
map.put("page", pagename);
|
||||
map.put("author", StrAuthor);
|
||||
map.put("ip", ip);
|
||||
try {
|
||||
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("blog_add", map));
|
||||
} catch (IOException e) {}
|
||||
map.put("subject", StrSubject.replace(',', ' '));
|
||||
map.put("author", StrAuthor.replace(',', ' '));
|
||||
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("blog_add", map));
|
||||
}
|
||||
|
||||
page = switchboard.blogDB.read(pagename); //maybe "if(page == null)"
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
|
||||
import java.io.File;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
@ -56,9 +57,12 @@ import de.anomic.data.listManager;
|
|||
import de.anomic.data.bookmarksDB.Tag;
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.plasma.plasmaCrawlLURL;
|
||||
import de.anomic.plasma.plasmaParserDocument;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacyNewsRecord;
|
||||
|
||||
public class Bookmarks {
|
||||
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
|
||||
|
@ -110,13 +114,22 @@ public class Bookmarks {
|
|||
bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_TITLE, title);
|
||||
bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_DESCRIPTION, description);
|
||||
if(((String) post.get("public")).equals("public")){
|
||||
bookmark.setPublic(true);
|
||||
bookmark.setPublic(true);
|
||||
|
||||
// create a news message
|
||||
HashMap map = new HashMap();
|
||||
map.put("url", url.replace(',', '|'));
|
||||
map.put("title", title.replace(',', ' '));
|
||||
map.put("description", description.replace(',', ' '));
|
||||
map.put("tags", tagsString.replace(',', ' '));
|
||||
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("bkmrkadd", map));
|
||||
}else{
|
||||
bookmark.setPublic(false);
|
||||
bookmark.setPublic(false);
|
||||
}
|
||||
bookmark.setTags(tags, true);
|
||||
switchboard.bookmarksDB.saveBookmark(bookmark);
|
||||
|
||||
|
||||
}else{
|
||||
//ERROR
|
||||
}
|
||||
|
@ -135,12 +148,13 @@ public class Bookmarks {
|
|||
if (bookmark == null) {
|
||||
// try to get the bookmark from the LURL database
|
||||
plasmaCrawlLURL.Entry urlentry = switchboard.urlPool.loadedURL.load(urlHash, null);
|
||||
plasmaParserDocument document = switchboard.snippetCache.retrieveDocument(urlentry.url(), true);
|
||||
if (urlentry != null) {
|
||||
prop.put("mode_edit", 0); // create mode
|
||||
prop.put("mode_title", urlentry.descr());
|
||||
prop.put("mode_description", urlentry.descr());
|
||||
prop.put("mode_description", (document == null) ? urlentry.descr() : document.getMainLongTitle());
|
||||
prop.put("mode_url", urlentry.url());
|
||||
prop.put("mode_tags", "");
|
||||
prop.put("mode_tags", (document == null) ? "" : document.getKeywords(','));
|
||||
prop.put("mode_public", 0);
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -44,10 +44,7 @@
|
|||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
import java.io.IOException;
|
||||
|
||||
import de.anomic.data.wikiCode;
|
||||
|
@ -62,12 +59,6 @@ import de.anomic.yacy.yacySeed;
|
|||
|
||||
public class IndexCreateIndexingQueue_p {
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
|
||||
|
|
|
@ -104,11 +104,8 @@ public class Wiki {
|
|||
// create a news message
|
||||
HashMap map = new HashMap();
|
||||
map.put("page", pagename);
|
||||
map.put("author", author);
|
||||
map.put("ip", ip);
|
||||
try {
|
||||
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("wiki_upd", map));
|
||||
} catch (IOException e) {}
|
||||
map.put("author", author.replace(',', ' '));
|
||||
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("wiki_upd", map));
|
||||
}
|
||||
|
||||
wikiBoard.entry page = switchboard.wikiDB.read(pagename);
|
||||
|
|
12
htroot/env/base.css
vendored
12
htroot/env/base.css
vendored
|
@ -198,19 +198,23 @@ div.urlactions a {
|
|||
display:block;
|
||||
}
|
||||
|
||||
img.bookmarkIcon, img.deleteIcon {
|
||||
img.bookmarkIcon, img.deleteIcon, img.recommendIcon {
|
||||
height: 16px;
|
||||
width: 16px;
|
||||
}
|
||||
|
||||
a.deletelink:hover, div.searchresults:hover a.deletelink, div.searchresults.hover a.deletelink {
|
||||
background:url(/env/grafics/minus.gif) center center no-repeat;
|
||||
a.bookmarklink:hover, div.searchresults:hover a.bookmarklink, div.searchresults.hover a.bookmarklink {
|
||||
background:url(/env/grafics/bookmark.gif) center center no-repeat;
|
||||
}
|
||||
|
||||
a.bookmarklink:hover, div.searchresults:hover a.bookmarklink, div.searchresults.hover a.bookmarklink {
|
||||
a.recommendlink:hover, div.searchresults:hover a.recommendlink, div.searchresults.hover a.recommendlink {
|
||||
background:url(/env/grafics/plus.gif) center center no-repeat;
|
||||
}
|
||||
|
||||
a.deletelink:hover, div.searchresults:hover a.deletelink, div.searchresults.hover a.deletelink {
|
||||
background:url(/env/grafics/minus.gif) center center no-repeat;
|
||||
}
|
||||
|
||||
div.searchresults p, div.searchresults h4 {
|
||||
margin:2px 2px 2px 22px;
|
||||
}
|
||||
|
|
BIN
htroot/env/grafics/bookmark.gif
vendored
Normal file
BIN
htroot/env/grafics/bookmark.gif
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 131 B |
2
htroot/env/xhtml-style.css
vendored
2
htroot/env/xhtml-style.css
vendored
|
@ -196,7 +196,7 @@ font-style:normal;
|
|||
color: #999999;
|
||||
text-decoration:none;
|
||||
}
|
||||
img.bookmarkIcon, img.deleteIcon {
|
||||
img.bookmarkIcon, img.deleteIcon, img.recommendIcon {
|
||||
height: 16px;
|
||||
width: 16px;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public class snippet {
|
|||
// do the search
|
||||
Set queryHashes = plasmaSearchQuery.words2hashes(query);
|
||||
|
||||
plasmaSnippetCache.result snippet = switchboard.snippetCache.retrieve(url, queryHashes, true, 260);
|
||||
plasmaSnippetCache.Snippet snippet = switchboard.snippetCache.retrieveSnippet(url, queryHashes, true, 260);
|
||||
prop.put("status",snippet.getSource());
|
||||
if (snippet.getSource() < 11) {
|
||||
//prop.put("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown");
|
||||
|
|
|
@ -83,7 +83,7 @@ public final class search {
|
|||
// final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability
|
||||
final String key = post.get("key", ""); // transmission key for response
|
||||
final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined
|
||||
final String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned
|
||||
String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned
|
||||
// final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results
|
||||
// final String fwden = post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping
|
||||
final long duetime= post.getLong("duetime", 3000);
|
||||
|
@ -94,6 +94,7 @@ public final class search {
|
|||
// final boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers
|
||||
// Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time
|
||||
|
||||
//urls = "nQoUx975gJ5C"; // ONLY FOR TESTS!
|
||||
|
||||
// tell all threads to do nothing for a specific time
|
||||
sb.intermissionAllThreads(2 * duetime);
|
||||
|
@ -196,10 +197,10 @@ public final class search {
|
|||
String resource = "";
|
||||
//plasmaIndexEntry pie;
|
||||
plasmaCrawlLURL.Entry urlentry;
|
||||
plasmaSnippetCache.result snippet;
|
||||
plasmaSnippetCache.Snippet snippet;
|
||||
while ((acc.hasMoreElements()) && (i < squery.wantedResults)) {
|
||||
urlentry = acc.nextElement();
|
||||
snippet = sb.snippetCache.retrieve(urlentry.url(), squery.queryHashes, false, 260);
|
||||
snippet = sb.snippetCache.retrieveSnippet(urlentry.url(), squery.queryHashes, false, 260);
|
||||
if (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH) {
|
||||
// suppress line: there is no match in that resource
|
||||
} else {
|
||||
|
|
|
@ -87,7 +87,13 @@ available for everyone. Then stay online to support crawls from other peers. Tha
|
|||
<div class="searchresults">
|
||||
<div class="urlactions">
|
||||
<a href="/Bookmarks.html?edit=#[urlhash]#" class="bookmarklink" title="bookmark"><img src="/env/grafics/empty.gif" title="bookmark" alt="bookmark" class="bookmarkIcon" /></a>
|
||||
<a href="#[delete]#" title="delete" class="deletelink" ><img src="/env/grafics/empty.gif" title="delete" alt="delete" class="deleteIcon" /></a>
|
||||
#(recommend)#
|
||||
<img src="/env/grafics/empty.gif" title="" alt="" class="recommendIcon" />
|
||||
<img src="/env/grafics/empty.gif" title="" alt="" class="deleteIcon" />
|
||||
::
|
||||
<a href="#[recommendlink]#" class="recommendlink" title="recommend"><img src="/env/grafics/empty.gif" title="recommend" alt="recommend" class="recommendIcon" /></a>
|
||||
<a href="#[deletelink]#" title="delete" class="deletelink" ><img src="/env/grafics/empty.gif" title="delete" alt="delete" class="deleteIcon" /></a>
|
||||
#(/recommend)#
|
||||
</div>
|
||||
<h4 class="linktitle"><a href="#[url]#">#[description]#</a></h4>
|
||||
<p class="snippet"><span class="#(snippet)#snippetLoading::snippetLoaded#(/snippet)#" id="#[urlhash]#">#(snippet)#loading snippet ...::#[text]##(/snippet)#</span></p>
|
||||
|
|
|
@ -57,6 +57,8 @@ import de.anomic.htmlFilter.htmlFilterImageEntry;
|
|||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.kelondro.kelondroMSetTools;
|
||||
import de.anomic.kelondro.kelondroNaturalOrder;
|
||||
import de.anomic.plasma.plasmaCrawlLURL;
|
||||
import de.anomic.plasma.plasmaParserDocument;
|
||||
import de.anomic.plasma.plasmaSearchImages;
|
||||
import de.anomic.plasma.plasmaSearchRankingProfile;
|
||||
import de.anomic.plasma.plasmaSearchTimingProfile;
|
||||
|
@ -68,6 +70,7 @@ import de.anomic.server.serverDate;
|
|||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacyNewsRecord;
|
||||
|
||||
public class yacysearch {
|
||||
|
||||
|
@ -166,12 +169,31 @@ public class yacysearch {
|
|||
prop.put("AUTHENTICATE", "admin log-in"); // force log-in
|
||||
return prop;
|
||||
}
|
||||
final String delHash = post.get("deleteref", "");
|
||||
final String delHash = post.get("deleteref", ""); // urlhash
|
||||
sb.removeReferences(delHash, query);
|
||||
}
|
||||
|
||||
// if aplus-button was hit, create new voting message
|
||||
if (post.containsKey("recommendref")) {
|
||||
if (!sb.verifyAuthentication(header, true)) {
|
||||
prop.put("AUTHENTICATE", "admin log-in"); // force log-in
|
||||
return prop;
|
||||
}
|
||||
final String recommendHash = post.get("recommendref", ""); // urlhash
|
||||
plasmaCrawlLURL.Entry urlentry = sb.urlPool.loadedURL.load(recommendHash, null);
|
||||
if (urlentry != null) {
|
||||
plasmaParserDocument document = sb.snippetCache.retrieveDocument(urlentry.url(), true);
|
||||
// create a news message
|
||||
HashMap map = new HashMap();
|
||||
map.put("url", urlentry.url().toNormalform().replace(',', '|'));
|
||||
map.put("title", urlentry.descr().replace(',', ' '));
|
||||
map.put("description", ((document == null) ? urlentry.descr() : document.getMainLongTitle()).replace(',', ' '));
|
||||
map.put("tags", ((document == null) ? "" : document.getKeywords(' ')));
|
||||
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map));
|
||||
}
|
||||
}
|
||||
|
||||
// prepare search order
|
||||
|
||||
final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getAddress() != null));
|
||||
|
||||
String order1 = plasmaSearchRankingProfile.ORDER_DATE;
|
||||
|
|
|
@ -165,7 +165,7 @@ font-style:normal;
|
|||
color: #999999;
|
||||
text-decoration:none;
|
||||
}
|
||||
img.bookmarkIcon{
|
||||
img.bookmarkIcon, img.deleteIcon, img.recommendIcon{
|
||||
height: 16px;
|
||||
width: 16px;
|
||||
}
|
||||
|
|
|
@ -165,7 +165,7 @@ font-style:normal;
|
|||
color: #999999;
|
||||
text-decoration:none;
|
||||
}
|
||||
img.bookmarkIcon{
|
||||
img.bookmarkIcon, img.deleteIcon, img.recommendIcon {
|
||||
height: 16px;
|
||||
width: 16px;
|
||||
}
|
||||
|
|
|
@ -310,7 +310,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
|
|||
if (s.length() == 0) {
|
||||
return getTitle().toLowerCase().split(splitrex);
|
||||
} else {
|
||||
return s.split(" |,");
|
||||
return s.split(" |,");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -97,8 +97,8 @@ public class odtParser extends AbstractParser implements Parser {
|
|||
byte[] docContent = null;
|
||||
String docDescription = null;
|
||||
String docKeywords = null;
|
||||
String docShortTitle = null;
|
||||
String docLongTitle = null;
|
||||
String docShortTitle = null;
|
||||
String docLongTitle = null;
|
||||
|
||||
// opening the file as zip file
|
||||
ZipFile zipFile= new ZipFile(dest);
|
||||
|
@ -153,7 +153,7 @@ public class odtParser extends AbstractParser implements Parser {
|
|||
location,
|
||||
mimeType,
|
||||
"UTF-8",
|
||||
docKeywords,
|
||||
docKeywords.split(" |,"),
|
||||
docShortTitle,
|
||||
docLongTitle,
|
||||
null,
|
||||
|
|
|
@ -153,7 +153,7 @@ public class pdfParser extends AbstractParser implements Parser {
|
|||
location,
|
||||
mimeType,
|
||||
"UTF-8",
|
||||
docKeyWords,
|
||||
docKeyWords.split(" |,"),
|
||||
docSubject,
|
||||
docTitle,
|
||||
null,
|
||||
|
|
|
@ -160,8 +160,8 @@ public class tarParser extends AbstractParser implements Parser {
|
|||
if (theDoc == null) continue;
|
||||
|
||||
// merging all documents together
|
||||
if (docKeywords.length() > 0) docKeywords.append("\n");
|
||||
docKeywords.append(theDoc.getKeywords());
|
||||
if (docKeywords.length() > 0) docKeywords.append(",");
|
||||
docKeywords.append(theDoc.getKeywords(','));
|
||||
|
||||
if (docLongTitle.length() > 0) docLongTitle.append("\n");
|
||||
docLongTitle.append(theDoc.getMainLongTitle());
|
||||
|
@ -190,7 +190,7 @@ public class tarParser extends AbstractParser implements Parser {
|
|||
location,
|
||||
mimeType,
|
||||
null,
|
||||
docKeywords.toString(),
|
||||
docKeywords.toString().split(" |,"),
|
||||
docShortTitle.toString(),
|
||||
docLongTitle.toString(),
|
||||
(String[])docSections.toArray(new String[docSections.size()]),
|
||||
|
|
|
@ -136,8 +136,8 @@ public class zipParser extends AbstractParser implements Parser {
|
|||
if (theDoc == null) continue;
|
||||
|
||||
// merging all documents together
|
||||
if (docKeywords.length() > 0) docKeywords.append("\n");
|
||||
docKeywords.append(theDoc.getKeywords());
|
||||
if (docKeywords.length() > 0) docKeywords.append(",");
|
||||
docKeywords.append(theDoc.getKeywords(','));
|
||||
|
||||
if (docLongTitle.length() > 0) docLongTitle.append("\n");
|
||||
docLongTitle.append(theDoc.getMainLongTitle());
|
||||
|
@ -166,7 +166,7 @@ public class zipParser extends AbstractParser implements Parser {
|
|||
location,
|
||||
mimeType,
|
||||
null,
|
||||
docKeywords.toString(),
|
||||
docKeywords.toString().split(" |,"),
|
||||
docShortTitle.toString(),
|
||||
docLongTitle.toString(),
|
||||
(String[])docSections.toArray(new String[docSections.size()]),
|
||||
|
|
|
@ -53,7 +53,6 @@ import java.io.IOException;
|
|||
import java.io.OutputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
|
||||
import de.anomic.net.URL;
|
||||
import java.util.Arrays;
|
||||
|
@ -588,8 +587,9 @@ public final class plasmaParser {
|
|||
int p = 0;
|
||||
for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j];
|
||||
plasmaParserDocument ppd = new plasmaParserDocument(new URL(location.toNormalform()),
|
||||
mimeType, scraper.getCharset(), null, null, scraper.getTitle(),
|
||||
sections, null,
|
||||
mimeType, scraper.getCharset(), scraper.getKeywords(),
|
||||
scraper.getTitle(), scraper.getTitle(),
|
||||
sections, scraper.getDescription(),
|
||||
scraper.getText(), scraper.getAnchors(), scraper.getImages());
|
||||
//scraper.close();
|
||||
return ppd;
|
||||
|
|
|
@ -57,7 +57,7 @@ public class plasmaParserDocument {
|
|||
URL location; // the source url
|
||||
String mimeType; // mimeType as taken from http header
|
||||
String charset; // the charset of the document
|
||||
String keywords; // most resources provide a keyword field
|
||||
String[] keywords; // most resources provide a keyword field
|
||||
String shortTitle; // a shortTitle mostly appears in the window header (border)
|
||||
String longTitle; // the real title of the document, commonly h1-tags
|
||||
String[] sections; // if present: more titles/headlines appearing in the document
|
||||
|
@ -75,13 +75,13 @@ public class plasmaParserDocument {
|
|||
boolean resorted;
|
||||
|
||||
public plasmaParserDocument(URL location, String mimeType, String charset,
|
||||
String keywords, String shortTitle, String longTitle,
|
||||
String[] keywords, String shortTitle, String longTitle,
|
||||
String[] sections, String abstrct,
|
||||
byte[] text, Map anchors, TreeSet images) {
|
||||
this.location = location;
|
||||
this.mimeType = (mimeType==null)?"application/octet-stream":mimeType;
|
||||
this.charset = charset;
|
||||
this.keywords = (keywords==null)?"":keywords;
|
||||
this.keywords = (keywords==null) ? new String[0] : keywords;
|
||||
this.shortTitle = (shortTitle==null)?"":shortTitle;
|
||||
this.longTitle = (longTitle==null)?"":longTitle;
|
||||
this.sections = (sections==null)?new String[0]:sections;
|
||||
|
@ -137,8 +137,21 @@ public class plasmaParserDocument {
|
|||
return getCondenser().sentences();
|
||||
}
|
||||
|
||||
public String getKeywords() {
|
||||
return this.keywords;
|
||||
public String getKeywords(char separator) {
|
||||
// sort out doubles and empty words
|
||||
TreeSet hs = new TreeSet();
|
||||
String s;
|
||||
for (int i = 0; i < this.keywords.length; i++) {
|
||||
if (this.keywords[i] == null) continue;
|
||||
s = this.keywords[i].trim();
|
||||
if (s.length() > 0) hs.add(s.toLowerCase());
|
||||
}
|
||||
if (hs.size() == 0) return "";
|
||||
// generate a new list
|
||||
StringBuffer sb = new StringBuffer(this.keywords.length * 6);
|
||||
Iterator i = hs.iterator();
|
||||
while (i.hasNext()) sb.append((String) i.next()).append(separator);
|
||||
return sb.substring(0, sb.length() - 1);
|
||||
}
|
||||
|
||||
public Map getAnchors() {
|
||||
|
|
|
@ -185,7 +185,7 @@ public final class plasmaSearchPreOrder {
|
|||
public Object[] /*{indexEntry, Long}*/ next() {
|
||||
String top = (String) pageAcc.firstKey();
|
||||
//System.out.println("preorder-key: " + top);
|
||||
Long preranking = new Long(Long.MAX_VALUE - Long.parseLong(top.substring(0, 16), 16));
|
||||
Long preranking = new Long(Long.MAX_VALUE - Long.parseLong(top.substring(0, 16), 16)); // java.lang.NumberFormatException: For input string: "8000000000020b17" ???
|
||||
return new Object[]{(indexEntry) pageAcc.remove(top), preranking};
|
||||
}
|
||||
|
||||
|
|
|
@ -96,11 +96,11 @@ public class plasmaSnippetCache {
|
|||
this.snippetsCache = new HashMap();
|
||||
}
|
||||
|
||||
public class result {
|
||||
public class Snippet {
|
||||
private String line;
|
||||
private String error;
|
||||
private int source;
|
||||
public result(String line, int source, String errortext) {
|
||||
public Snippet(String line, int source, String errortext) {
|
||||
this.line = line;
|
||||
this.source = source;
|
||||
this.error = errortext;
|
||||
|
@ -147,11 +147,11 @@ public class plasmaSnippetCache {
|
|||
return retrieveFromCache(hashes, indexURL.urlHash(url)) != null;
|
||||
}
|
||||
|
||||
public result retrieve(URL url, Set queryhashes, boolean fetchOnline, int snippetMaxLength) {
|
||||
public Snippet retrieveSnippet(URL url, Set queryhashes, boolean fetchOnline, int snippetMaxLength) {
|
||||
// heise = "0OQUNU3JSs05"
|
||||
if (queryhashes.size() == 0) {
|
||||
//System.out.println("found no queryhashes for URL retrieve " + url);
|
||||
return new result(null, ERROR_NO_HASH_GIVEN, "no query hashes given");
|
||||
return new Snippet(null, ERROR_NO_HASH_GIVEN, "no query hashes given");
|
||||
}
|
||||
String urlhash = indexURL.urlHash(url);
|
||||
|
||||
|
@ -161,7 +161,7 @@ public class plasmaSnippetCache {
|
|||
String line = retrieveFromCache(wordhashes, urlhash);
|
||||
if (line != null) {
|
||||
//System.out.println("found snippet for URL " + url + " in cache: " + line);
|
||||
return new result(line, source, null);
|
||||
return new Snippet(line, source, null);
|
||||
}
|
||||
|
||||
// if the snippet is not in the cache, we can try to get it from the htcache
|
||||
|
@ -178,32 +178,51 @@ public class plasmaSnippetCache {
|
|||
source = SOURCE_WEB;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
return new result(null, ERROR_SOURCE_LOADING, "error loading resource from web: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
return new Snippet(null, ERROR_SOURCE_LOADING, "error loading resource from web: " + e.getMessage());
|
||||
}
|
||||
if (resource == null) {
|
||||
//System.out.println("cannot load document for URL " + url);
|
||||
return new result(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL");
|
||||
return new Snippet(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL");
|
||||
}
|
||||
plasmaParserDocument document = parseDocument(url, resource, docInfo);
|
||||
|
||||
if (document == null) return new result(null, ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
|
||||
if (document == null) return new Snippet(null, ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
|
||||
//System.out.println("loaded document for URL " + url);
|
||||
String[] sentences = document.getSentences();
|
||||
//System.out.println("----" + url.toString()); for (int l = 0; l < sentences.length; l++) System.out.println(sentences[l]);
|
||||
if ((sentences == null) || (sentences.length == 0)) {
|
||||
//System.out.println("found no sentences in url " + url);
|
||||
return new result(null, ERROR_PARSER_NO_LINES, "parser returned no sentences");
|
||||
return new Snippet(null, ERROR_PARSER_NO_LINES, "parser returned no sentences");
|
||||
}
|
||||
|
||||
// we have found a parseable non-empty file: use the lines
|
||||
line = computeSnippet(sentences, queryhashes, 8 + 6 * queryhashes.size(), snippetMaxLength);
|
||||
//System.out.println("loaded snippet for URL " + url + ": " + line);
|
||||
if (line == null) return new result(null, ERROR_NO_MATCH, "no matching snippet found");
|
||||
if (line == null) return new Snippet(null, ERROR_NO_MATCH, "no matching snippet found");
|
||||
if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength);
|
||||
|
||||
// finally store this snippet in our own cache
|
||||
storeToCache(wordhashes, urlhash, line);
|
||||
return new result(line, source, null);
|
||||
return new Snippet(line, source, null);
|
||||
}
|
||||
|
||||
public plasmaParserDocument retrieveDocument(URL url, boolean fetchOnline) {
|
||||
byte[] resource = null;
|
||||
IResourceInfo docInfo = null;
|
||||
try {
|
||||
resource = this.cacheManager.loadResourceContent(url);
|
||||
if ((fetchOnline) && (resource == null)) {
|
||||
plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
|
||||
if (entry != null) docInfo = entry.getDocumentInfo();
|
||||
resource = this.cacheManager.loadResourceContent(url);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
if (resource == null) return null;
|
||||
return parseDocument(url, resource, docInfo);
|
||||
}
|
||||
|
||||
public void storeToCache(String wordhashes, String urlhash, String snippet) {
|
||||
|
@ -460,7 +479,7 @@ public class plasmaSnippetCache {
|
|||
}
|
||||
public void run() {
|
||||
log.logFine("snippetFetcher: try to get URL " + url);
|
||||
plasmaSnippetCache.result snippet = retrieve(url, queryhashes, true, 260);
|
||||
plasmaSnippetCache.Snippet snippet = retrieveSnippet(url, queryhashes, true, 260);
|
||||
if (snippet.line == null)
|
||||
log.logFine("snippetFetcher: cannot get URL " + url + ". error(" + snippet.source + "): " + snippet.error);
|
||||
else
|
||||
|
|
|
@ -146,7 +146,6 @@ import de.anomic.kelondro.kelondroMapTable;
|
|||
import de.anomic.plasma.dbImport.dbImportManager;
|
||||
import de.anomic.plasma.urlPattern.plasmaURLPattern;
|
||||
import de.anomic.server.serverAbstractSwitch;
|
||||
import de.anomic.server.serverByteBuffer;
|
||||
import de.anomic.server.serverCodings;
|
||||
import de.anomic.server.serverDate;
|
||||
import de.anomic.server.serverInstantThread;
|
||||
|
@ -2033,7 +2032,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
String urlstring, urlname, filename, urlhash;
|
||||
String host, hash, address, descr = "";
|
||||
yacySeed seed;
|
||||
plasmaSnippetCache.result snippet;
|
||||
plasmaSnippetCache.Snippet snippet;
|
||||
String formerSearch = query.words(" ");
|
||||
long targetTime = timestamp + query.maximumTime;
|
||||
if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 5000;
|
||||
|
@ -2077,11 +2076,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
//addScoreForked(ref, gs, urlstring.split("/"));
|
||||
URL wordURL;
|
||||
if (urlstring.matches(query.urlMask)) { //.* is default
|
||||
snippet = snippetCache.retrieve(url, query.queryHashes, false, 260);
|
||||
snippet = snippetCache.retrieveSnippet(url, query.queryHashes, false, 260);
|
||||
if (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH) {
|
||||
// suppress line: there is no match in that resource
|
||||
} else {
|
||||
prop.put("type_results_" + i + "_delete", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
|
||||
prop.put("type_results_" + i + "_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", urlstring) == null) ? 1 : 0);
|
||||
prop.put("type_results_" + i + "_recommend_deletelink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
|
||||
prop.put("type_results_" + i + "_recommend_recommendlink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
|
||||
prop.put("type_results_" + i + "_description", descr);
|
||||
prop.put("type_results_" + i + "_url", urlstring);
|
||||
prop.put("type_results_" + i + "_urlhash", urlhash);
|
||||
|
|
|
@ -463,11 +463,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
}
|
||||
|
||||
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
|
||||
if (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete)) return true;
|
||||
if (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete)) return true;
|
||||
if (useCollectionIndex) {if (collections.removeEntry(wordHash, urlHash, deleteComplete)) return true;}
|
||||
if (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete)) return true;
|
||||
return backend.removeEntry(wordHash, urlHash, deleteComplete);
|
||||
boolean removed = false;
|
||||
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete));
|
||||
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete));
|
||||
if (useCollectionIndex) {removed = removed | (collections.removeEntry(wordHash, urlHash, deleteComplete));}
|
||||
removed = removed | (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete));
|
||||
removed = removed | backend.removeEntry(wordHash, urlHash, deleteComplete);
|
||||
return removed;
|
||||
}
|
||||
|
||||
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
|
||||
|
|
|
@ -303,7 +303,6 @@ public final class serverCharBuffer extends Writer {
|
|||
int l = 0;
|
||||
while ((l < length) && (buffer[offset + l] <= ' ')) l++;
|
||||
int r = length;
|
||||
int u;
|
||||
while ((r > 0) && (buffer[offset + r - 1] <= ' ')) r--;
|
||||
if (l > r) r = l;
|
||||
return trim(l, r);
|
||||
|
|
|
@ -67,20 +67,18 @@ public class yacyNewsPool {
|
|||
"flshradd", // a file was added to the file share
|
||||
"flshrdel", // a file was added to the file share
|
||||
"flshrcom", // a comment to a file share entry
|
||||
"brdcstin", // a broadcast news in rss format
|
||||
"brdcstup", // an update to a broadcast
|
||||
"brdcstvt", // a vote on a broadcast
|
||||
"brdcstco", // a comment on a broadcast
|
||||
"bkmrkadd", // a bookmark was added/created
|
||||
"bkmrkavt", // a vote and comment on a bookmark add
|
||||
"bkmrkmov", // a bookmark was moved
|
||||
"bkmrkmvt", // a vote and comment on a bookmark move
|
||||
"bkmrkdel", // a bookmark was deleted
|
||||
"bkmrkdvt", // a vote and comment on a bookmark delete
|
||||
"wiki_add", // a wiki page was created
|
||||
"stippadd", // a surf tipp was added
|
||||
"stippavt", // a vote and comment on a surf tipp
|
||||
"wiki_upd", // a wiki page was updated
|
||||
"wiki_del", // a wiki page das deleted
|
||||
"blog_add" // a blog entry was added
|
||||
"blog_add", // a blog entry was added
|
||||
"blog_del" // a blog page das deleted
|
||||
};
|
||||
public static HashSet categories;
|
||||
static {
|
||||
|
@ -122,12 +120,14 @@ public class yacyNewsPool {
|
|||
return newsDB.cacheObjectStatus();
|
||||
}
|
||||
|
||||
public void publishMyNews(yacyNewsRecord record) throws IOException {
|
||||
public void publishMyNews(yacyNewsRecord record) {
|
||||
// this shall be called if our peer generated a new news record and wants to publish it
|
||||
if (newsDB.get(record.id()) == null) {
|
||||
incomingNews.push(record); // we want to see our own news..
|
||||
outgoingNews.push(record); // .. and put it on the publishing list
|
||||
}
|
||||
try {
|
||||
if (newsDB.get(record.id()) == null) {
|
||||
incomingNews.push(record); // we want to see our own news..
|
||||
outgoingNews.push(record); // .. and put it on the publishing list
|
||||
}
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
|
||||
public yacyNewsRecord myPublication() throws IOException {
|
||||
|
@ -183,23 +183,28 @@ public class yacyNewsPool {
|
|||
return pc;
|
||||
}
|
||||
|
||||
long day = 1000 * 60 * 60 * 24;
|
||||
private boolean automaticProcessP(yacyNewsRecord record) {
|
||||
if (record == null) return false;
|
||||
if (record.category() == null) return true;
|
||||
if ((System.currentTimeMillis() - record.created().getTime()) > (1000 * 60 * 60 * 24 * 7) /* 1 Week */) {
|
||||
if ((System.currentTimeMillis() - record.created().getTime()) > (14 * day)) {
|
||||
// remove everything after 1 week
|
||||
return true;
|
||||
}
|
||||
if (((record.category().equals("wiki_add")) || (record.category().equals("wiki_upd"))) &&
|
||||
((System.currentTimeMillis() - record.created().getTime()) > (1000 * 60 * 60 * 24 * 3) /* 3 Days */)) {
|
||||
return true;
|
||||
}
|
||||
if ((record.category().equals("wiki_upd")) &&
|
||||
((System.currentTimeMillis() - record.created().getTime()) > (3 * day))) {
|
||||
return true;
|
||||
}
|
||||
if ((record.category().equals("blog_add")) &&
|
||||
((System.currentTimeMillis() - record.created().getTime()) > (1000 * 60 * 60 * 24 * 3) /* 3 Days */)) {
|
||||
((System.currentTimeMillis() - record.created().getTime()) > (3 * day))) {
|
||||
return true;
|
||||
}
|
||||
if ((record.category().equals("prfleupd")) &&
|
||||
((System.currentTimeMillis() - record.created().getTime()) > (7 * day))) {
|
||||
return true;
|
||||
}
|
||||
if ((record.category().equals("crwlstrt")) &&
|
||||
((System.currentTimeMillis() - record.created().getTime()) > (1000 * 60 * 60 * 24 * 2) /* 2 Days */)) {
|
||||
((System.currentTimeMillis() - record.created().getTime()) > (2 * day))) {
|
||||
yacySeed seed = yacyCore.seedDB.get(record.originator());
|
||||
if (seed == null) return false;
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue
Block a user