added news-creation at crawl start

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@460 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2005-07-30 11:57:19 +00:00
parent f555b9d5f2
commit 13abd8b6e7
5 changed files with 43 additions and 5 deletions

View File

@ -96,7 +96,7 @@ public class EditProfile_p {
// generate a news message
//HashMap map = new HashMap();
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("updprfle", profile));
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("prfleupd", profile));
}catch(IOException e){
} finally {
if (fileOut != null) try { fileOut.close(); } catch (Exception e) {}

View File

@ -73,6 +73,7 @@ You can define URLs as start points for Web page crawling and start that crawlin
If checked, the crawl will try to assign the leaf nodes of the search tree to remote peers.
If you need your crawling results locally, you must switch this off.
Only senior and principal peer's can initiate or receive remote crawls.
A News message will be created to inform all peers about a global crawl, so they can ommit starting a crawl with the same start point.
</td>
</tr>
<tr valign="top" class="TableCellDark">

View File

@ -63,6 +63,7 @@ import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -70,6 +71,7 @@ import de.anomic.server.serverThread;
import de.anomic.tools.bitfield;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyNewsRecord;
public class IndexCreate_p {
@ -144,13 +146,19 @@ public class IndexCreate_p {
switchboard.urlPool.noticeURL.remove(urlhash);
// stack url
String reasonString = switchboard.stackCrawl(crawlingStart, null, yacyCore.seedDB.mySeed.hash, "CRAWLING-ROOT", new Date(), 0,
switchboard.profiles.newEntry(crawlingStartURL.getHost(), crawlingStart, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw));
plasmaCrawlProfile.entry pe = switchboard.profiles.newEntry(crawlingStartURL.getHost(), crawlingStart, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw);
String reasonString = switchboard.stackCrawl(crawlingStart, null, yacyCore.seedDB.mySeed.hash, "CRAWLING-ROOT", new Date(), 0, pe);
if (reasonString == null) {
// liftoff!
prop.put("info", 2);//start msg
prop.put("info_crawlingURL", ((String) post.get("crawlingURL")));
// generate a YaCyNews if the global flag was set
if (crawlOrder) {
yacyCore.newsPool.publishMyNews(new yacyNewsRecord("crwlstrt", pe.map()));
}
} else {
prop.put("error", 5); //Crawling failed
prop.put("error_crawlingURL", ((String) post.get("crawlingURL")));

View File

@ -435,8 +435,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if ((profiles.size() == 1) ||
(getConfig("defaultRemoteProfile", "").length() == 0) ||
(profiles.getEntry(getConfig("defaultRemoteProfile", "")) == null)) {
// generate new default entry for proxy crawling
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, false, false, true, true, false, true, true, false);
// generate new default entry for remote crawling
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false);
setConfig("defaultRemoteProfile", defaultRemoteProfile.handle());
} else {
defaultRemoteProfile = profiles.getEntry(getConfig("defaultRemoteProfile", ""));

View File

@ -51,10 +51,39 @@ public class yacyNewsPool {
public static final int OUTGOING_DB = 2;
public static final int PUBLISHED_DB = 3;
public static final String[] category = {
"prfleupd", // a profile entry was updated (implemented)
"crwlstrt", // a crawl with remote indexing was startet
"crwlstop", // a crawl with remote indexing was stopped
"crwlcomm", // a comment on a crawl with remote indexing
"blckladd", // a public blacklist entry was added
"blcklavt", // a vote and comment on a public blacklist add
"blckldel", // a public blacklist entry was deleted
"blckldvt", // a vote and comment on a public blacklist delete
"flshradd", // a file was added to the file share
"flshrdel", // a file was added to the file share
"flshrcom", // a comment to a file share entry
"brdcstin", // a broadcast news in rss format
"brdcstup", // an update to a broadcast
"brdcstvt", // a vote on a broadcast
"brdcstco", // a comment on a broadcast
"bkmrkadd", // a bookmark was added/created
"bkmrkavt", // a vote and comment on a bookmark add
"bkmrkmov", // a bookmark was moved
"bkmrkmvt", // a vote and comment on a bookmark move
"bkmrkdel", // a bookmark was deleted
"bkmrkdvt", // a vote and comment on a bookmark delete
"wiki_add", // a wiki page was created
"wiki_upd", // a wiki page was updated
"wiki_del" // a wiki page das deleted
// urlvotes
};
private yacyNewsDB newsDB;
private yacyNewsQueue outgoingNews, publishedNews, incomingNews, processedNews;
private int maxDistribution;
public yacyNewsPool(File yacyDBPath, int bufferkb) throws IOException {
newsDB = new yacyNewsDB(new File(yacyDBPath, "news0.db"), bufferkb);
outgoingNews = new yacyNewsQueue(new File(yacyDBPath, "newsOut0.stack"), newsDB);