refactoring

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2007-08-06 00:56:56 +00:00
parent 5605887571
commit 1af0e3bd84
3 changed files with 70 additions and 72 deletions

View File

@ -61,6 +61,7 @@ import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchPreOrder;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchPostOrder;
@ -187,14 +188,14 @@ public final class search {
// prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile);
plasmaSearchProcessing localTiming = new plasmaSearchProcessing(squery.maximumTime, squery.wantedResults);
plasmaSearchProcessing remoteTiming = null;
plasmaSearchProcessing localProcess = new plasmaSearchProcessing(squery.maximumTime, squery.wantedResults);
plasmaSearchProcessing remoteProcess = null;
theSearch = new plasmaSearchEvent(squery,
rankingProfile, localTiming, remoteTiming, true,
rankingProfile, localProcess, remoteProcess, true,
yacyCore.log, sb.wordIndex,
sb.snippetCache, null);
Map[] containers = localTiming.localSearchContainers(squery, sb.wordIndex, plasmaSearchQuery.hashes2Set(urls));
Map[] containers = localProcess.localSearchContainers(squery, sb.wordIndex, plasmaSearchQuery.hashes2Set(urls));
// set statistic details of search result and find best result index set
if (containers == null) {
prop.putASIS("indexcount", "");
@ -234,12 +235,12 @@ public final class search {
indexContainer localResults =
(containers == null) ?
plasmaWordIndex.emptyContainer(null) :
localTiming.localSearchJoinExclude(
localProcess.localSearchJoinExclude(
containers[0].values(),
containers[1].values(),
(squery.queryHashes.size() == 0) ?
0 :
localTiming.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * squery.queryHashes.size() / (squery.queryHashes.size() + squery.excludeHashes.size()),
localProcess.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * squery.queryHashes.size() / (squery.queryHashes.size() + squery.excludeHashes.size()),
squery.maxDistance);
if (localResults == null) {
joincount = 0;
@ -248,10 +249,13 @@ public final class search {
} else {
joincount = localResults.size();
prop.putASIS("joincount", Integer.toString(joincount));
acc = localTiming.orderFinal(squery, rankingProfile, sb.wordIndex, true, localResults);
plasmaSearchPreOrder pre = localProcess.preSort(squery, rankingProfile, localResults);
acc = localProcess.urlFetch(squery, rankingProfile, sb.wordIndex, pre);
acc.localContributions = (localResults == null) ? 0 : localResults.size();
localProcess.postSort(true, acc);
localProcess.applyFilter(acc);
}
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific
// urls, because it is a re-search

View File

@ -1,11 +1,15 @@
// plasmaSearchEvent.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// Created: 10.10.2005
// plasmaSearchEvent.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 10.10.2005 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -19,25 +23,6 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
@ -48,8 +33,6 @@ import java.util.Map;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore;
@ -195,7 +178,6 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
prefetchLocal(rcLocal, secondaryTimeout);
// this is temporary debugging code to learn that the index abstracts are fetched correctly
while (System.currentTimeMillis() < secondaryTimeout) {
@ -224,8 +206,13 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
searchResult.addAllUnique(rcContainers);
searchResult.sort();
searchResult.uniq(1000);
result = profileLocal.orderFinal(query, ranking, wordIndex, postsort, searchResult);
plasmaSearchPreOrder pre = profileLocal.preSort(query, ranking, searchResult);
result = profileLocal.urlFetch(query, ranking, wordIndex, pre);
result.localContributions = (rcLocal == null) ? 0 : rcLocal.size();
profileLocal.postSort(postsort, result);
profileLocal.applyFilter(result);
if (result != null) {
result.globalContributions = globalContributions;
@ -245,7 +232,12 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
result = profileLocal.orderFinal(query, ranking, wordIndex, postsort, rcLocal);
plasmaSearchPreOrder pre = profileLocal.preSort(query, ranking, rcLocal);
result = profileLocal.urlFetch(query, ranking, wordIndex, pre);
result.localContributions = (rcLocal == null) ? 0 : rcLocal.size();
profileLocal.postSort(postsort, result);
profileLocal.applyFilter(result);
result.globalContributions = 0;
}
@ -354,27 +346,6 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return wordlist;
}
private void prefetchLocal(indexContainer rcLocal, long timeout) {
// pre-fetch some urls to fill LURL ram cache
if (rcLocal == null) return;
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, rcLocal, timeout - System.currentTimeMillis());
if (preorder.filteredCount() > query.wantedResults) preorder.remove(true, true);
// start url-fetch
indexRWIEntry entry;
try {
while (preorder.hasNext()) {
if (System.currentTimeMillis() >= timeout) break;
entry = (indexRWIEntry) (preorder.next()[0]);
// find and fetch the url entry
urlStore.load(entry.urlHash(), entry);
}
} catch (kelondroException ee) {
serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
}
}
public void run() {
flushThreads.add(this); // this will care that the search event object is referenced from somewhere while it is still alive

View File

@ -1,4 +1,4 @@
// plasmaSearchProcess.java
// plasmaSearchProcessing.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 17.10.2005 on http://yacy.net
//
@ -276,6 +276,7 @@ public class plasmaSearchProcessing implements Cloneable {
// the processes
// collection
public Map[] localSearchContainers(
plasmaSearchQuery query,
plasmaWordIndex wordIndex,
@ -305,6 +306,7 @@ public class plasmaSearchProcessing implements Cloneable {
return new Map[]{inclusionContainers, exclusionContainers};
}
// join
public indexContainer localSearchJoinExclude(
Collection includeContainers,
Collection excludeContainers,
@ -329,11 +331,10 @@ public class plasmaSearchProcessing implements Cloneable {
return rcLocal;
}
public plasmaSearchPostOrder orderFinal(
// presort
public plasmaSearchPreOrder preSort(
plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaWordIndex wordIndex,
boolean postsort,
indexContainer resultIndex) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
@ -353,6 +354,16 @@ public class plasmaSearchProcessing implements Cloneable {
setYieldTime(plasmaSearchProcessing.PROCESS_PRESORT);
setYieldCount(plasmaSearchProcessing.PROCESS_PRESORT, resultIndex.size());
return preorder;
}
// urlfetch
public plasmaSearchPostOrder urlFetch(
plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaWordIndex wordIndex,
plasmaSearchPreOrder preorder) {
// start url-fetch
long postorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_POSTSORT);
//System.out.println("DEBUG: postorder-final (urlfetch) maxtime = " + postorderTime);
@ -415,21 +426,33 @@ public class plasmaSearchProcessing implements Cloneable {
setYieldTime(plasmaSearchProcessing.PROCESS_URLFETCH);
setYieldCount(plasmaSearchProcessing.PROCESS_URLFETCH, acc.sizeFetched());
acc.filteredResults = preorder.filteredCount();
return acc;
}
//acc.localContributions = (resultIndex == null) ? 0 : resultIndex.size();
// postsort
public void postSort(
boolean postsort,
plasmaSearchPostOrder acc) {
// start postsorting
startTimer();
acc.sortPages(postsort);
setYieldTime(plasmaSearchProcessing.PROCESS_POSTSORT);
setYieldCount(plasmaSearchProcessing.PROCESS_POSTSORT, acc.sizeOrdered());
}
// filter
public void applyFilter(
plasmaSearchPostOrder acc) {
// apply filter
startTimer();
acc.removeRedundant();
setYieldTime(plasmaSearchProcessing.PROCESS_FILTER);
setYieldCount(plasmaSearchProcessing.PROCESS_FILTER, acc.sizeOrdered());
acc.localContributions = (resultIndex == null) ? 0 : resultIndex.size();
acc.filteredResults = preorder.filteredCount();
return acc;
}
}