From 74d1dea30ba7c962e9904b36b5519e5546b8650c Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 10 Sep 2006 22:36:47 +0000 Subject: [PATCH] changes towards better join-search - added generation of a compressed index within remote peers during global search - added selection of specific urls within remote peers during secondary global search git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2539 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControl_p.java | 6 +-- htroot/yacy/search.html | 3 +- htroot/yacy/search.java | 52 +++++++++++++++---- source/de/anomic/index/indexAbstractRI.java | 2 +- source/de/anomic/index/indexCollectionRI.java | 5 +- source/de/anomic/index/indexContainer.java | 5 +- source/de/anomic/index/indexRAMCacheRI.java | 10 +++- source/de/anomic/index/indexRI.java | 2 +- .../de/anomic/index/indexRowSetContainer.java | 51 ++++++++++++++++-- .../kelondro/kelondroRowCollection.java | 13 +++++ .../de/anomic/plasma/plasmaSearchEvent.java | 15 +++--- source/de/anomic/plasma/plasmaWordIndex.java | 25 ++++----- .../plasmaWordIndexAssortmentCluster.java | 14 ++--- .../plasma/plasmaWordIndexFileCluster.java | 6 +-- 14 files changed, 156 insertions(+), 53 deletions(-) diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 0015412dc..b83834922 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -149,7 +149,7 @@ public class IndexControl_p { if (delurl || delurlref) { // generate an urlx array indexContainer index = null; - index = switchboard.wordIndex.getContainer(keyhash, true, -1); + index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); Iterator en = index.entries(); int i = 0; urlx = new String[index.size()]; @@ -252,7 +252,7 @@ public class IndexControl_p { indexContainer index; String result; long starttime = System.currentTimeMillis(); - index = switchboard.wordIndex.getContainer(keyhash, true, -1); + index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); // built urlCache Iterator urlIter = index.entries(); HashMap knownURLs = new HashMap(); @@ -424,7 +424,7 @@ public class IndexControl_p { // search for a word hash and generate a list of url links indexContainer index = null; try { - index = switchboard.wordIndex.getContainer(keyhash, true, -1); + index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); final StringBuffer result = new StringBuffer(1024); if (index.size() == 0) { diff --git a/htroot/yacy/search.html b/htroot/yacy/search.html index b6e8ec7c0..7e18b99a5 100644 --- a/htroot/yacy/search.html +++ b/htroot/yacy/search.html @@ -8,4 +8,5 @@ references=#[references]# joincount=#[joincount]# count=#[linkcount]# #[links]# -#[indexcount]# \ No newline at end of file +#[indexcount]# +#[indexabstract]# \ No newline at end of file diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 1db0bc86a..01bca65f4 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -49,6 +49,7 @@ import java.util.HashSet; import java.util.Iterator; +import java.util.Map; import java.util.Set; import de.anomic.http.httpHeader; @@ -81,7 +82,8 @@ public final class search { final String oseed = post.get("myseed", ""); // complete seed of the requesting peer // final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability final String key = post.get("key", ""); // transmission key for response - final String query = post.get("query", ""); // a string of word hashes + final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined + final String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned // final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results // final String fwden = post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping final long duetime= post.getLong("duetime", 3000); @@ -117,34 +119,64 @@ public final class search { yacyCore.log.logInfo("INIT HASH SEARCH: " + squery.queryHashes + " - " + squery.wantedResults + " links"); long timestamp1 = System.currentTimeMillis(); + + // prepare a search profile plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY}); plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults); plasmaSearchTimingProfile remoteTiming = null; - plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache); - Set containers = theSearch.localSearchContainers(); - indexContainer localResults = theSearch.localSearchJoin(containers); - int joincount = localResults.size(); - plasmaSearchResult acc = theSearch.order(localResults); - // set statistic details of search result - prop.put("joincount", Integer.toString(joincount)); + // retrieve index containers from search request + plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache); + Set urlselection = null; + if ((urls.length() > 0) && (urls.length() % 12 == 0)) { + for (int i = 0; i < (urls.length() / 12); i++) urlselection.add(urls.substring(i * 12, (i + 1 * 12))); + } + Map containers = theSearch.localSearchContainers(urlselection); + + // set statistic details of search result and find best result index set + String maxcounthash = null; if (containers == null) { prop.put("indexcount", ""); } else { - Iterator ci = containers.iterator(); + Iterator ci = containers.entrySet().iterator(); StringBuffer indexcount = new StringBuffer(); + Map.Entry entry; + String wordhash; + int maxcount = -1; while (ci.hasNext()) { - indexContainer container = (indexContainer) ci.next(); + entry = (Map.Entry) ci.next(); + wordhash = (String) entry.getKey(); + indexContainer container = (indexContainer) entry.getValue(); + if (container.size() > maxcount) maxcounthash = wordhash; indexcount.append("indexcount.").append(container.getWordHash()).append('=').append(Integer.toString(container.size())).append(serverCore.crlfString); } prop.put("indexcount", new String(indexcount)); } + // generate compressed index for maxcounthash + // this is not needed if the search is restricted to specific urls, because it is a re-search + if ((maxcounthash == null) || (urls.length() != 0)) { + prop.put("indexabstract",""); + } else { + String indexabstract = "indexabstract." + maxcounthash + "=" + ((indexContainer) containers.get(maxcounthash)).compressedIndex(1000); + yacyCore.log.logFine("DEBUG HASH SEARCH: " + indexabstract); + prop.put("indexabstract", indexabstract); + } + // join and order the result + indexContainer localResults = theSearch.localSearchJoin(containers.values()); + int joincount = localResults.size(); + prop.put("joincount", Integer.toString(joincount)); + plasmaSearchResult acc = theSearch.order(localResults); + + // prepare result if ((joincount == 0) || (acc == null)) { + + // no results prop.put("links", ""); prop.put("linkcount", "0"); prop.put("references", ""); + } else { // result is a List of urlEntry elements diff --git a/source/de/anomic/index/indexAbstractRI.java b/source/de/anomic/index/indexAbstractRI.java index 58029f64b..1715f1a45 100644 --- a/source/de/anomic/index/indexAbstractRI.java +++ b/source/de/anomic/index/indexAbstractRI.java @@ -36,7 +36,7 @@ public abstract class indexAbstractRI implements indexRI { } public long getUpdateTime(String wordHash) { - indexContainer entries = getContainer(wordHash, false, -1); + indexContainer entries = getContainer(wordHash, null, false, -1); if (entries == null) return 0; return entries.updated(); } diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java index 96951f073..0aca8b3df 100644 --- a/source/de/anomic/index/indexCollectionRI.java +++ b/source/de/anomic/index/indexCollectionRI.java @@ -108,10 +108,11 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI { } - public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime) { + public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) { try { kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty); - if (collection == null) return null; + collection.select(urlselection); + if ((collection == null) || (collection.size() == 0)) return null; return new indexRowSetContainer(wordHash, collection); } catch (IOException e) { return null; diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java index 6ee66b062..4e47a9efe 100644 --- a/source/de/anomic/index/indexContainer.java +++ b/source/de/anomic/index/indexContainer.java @@ -32,6 +32,7 @@ import java.util.Iterator; import java.util.Set; import de.anomic.kelondro.kelondroOrder; +import de.anomic.server.serverByteBuffer; public interface indexContainer { @@ -43,7 +44,9 @@ public interface indexContainer { public void setWordHash(String newWordHash); public String getWordHash(); - + public serverByteBuffer compressedIndex(long maxtime); + public void select(Set urlselection); + public void setOrdering(kelondroOrder newOrder, int newColumn); public kelondroOrder order(); public int orderColumn(); diff --git a/source/de/anomic/index/indexRAMCacheRI.java b/source/de/anomic/index/indexRAMCacheRI.java index 770337259..39ba8e41b 100644 --- a/source/de/anomic/index/indexRAMCacheRI.java +++ b/source/de/anomic/index/indexRAMCacheRI.java @@ -386,8 +386,14 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { return (((long) intTime) * (long) 1000) + initTime; } - public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime_dummy) { - return (indexContainer) wCache.get(wordHash); + public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) { + if (urlselection == null) { + return (indexContainer) wCache.get(wordHash); + } else { + indexContainer ic = ((indexContainer) wCache.get(wordHash)).topLevelClone(); + ic.select(urlselection); + return ic; + } } public indexContainer deleteContainer(String wordHash) { diff --git a/source/de/anomic/index/indexRI.java b/source/de/anomic/index/indexRI.java index 3738fd11d..2cadc1352 100644 --- a/source/de/anomic/index/indexRI.java +++ b/source/de/anomic/index/indexRI.java @@ -53,7 +53,7 @@ public interface indexRI { public long getUpdateTime(String wordHash); - public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime); + public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime); public indexContainer deleteContainer(String wordHash); public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete); diff --git a/source/de/anomic/index/indexRowSetContainer.java b/source/de/anomic/index/indexRowSetContainer.java index bca8e0c81..194beb5c4 100644 --- a/source/de/anomic/index/indexRowSetContainer.java +++ b/source/de/anomic/index/indexRowSetContainer.java @@ -27,9 +27,11 @@ package de.anomic.index; import java.lang.reflect.Method; +import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.Set; +import java.util.Map; import java.util.TreeMap; import de.anomic.kelondro.kelondroBase64Order; @@ -37,6 +39,7 @@ import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; +import de.anomic.server.serverByteBuffer; public class indexRowSetContainer extends kelondroRowSet implements indexContainer { @@ -64,6 +67,43 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain return newContainer; } + public serverByteBuffer compressedIndex(long maxtime) { + // collect references according to domains + long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; + TreeMap doms = new TreeMap(); + synchronized(this) { + Iterator i = entries(); + indexEntry iEntry; + String dom, paths; + while (i.hasNext()) { + iEntry = (indexEntry) i.next(); + dom = iEntry.urlHash().substring(6); + if ((paths = (String) doms.get(dom)) == null) { + doms.put(dom, iEntry.urlHash().substring(0, 6)); + } else { + doms.put(dom, paths + iEntry.urlHash().substring(0, 6)); + } + if (System.currentTimeMillis() > timeout) break; + } + } + // construct a result string + serverByteBuffer bb = new serverByteBuffer(this.size() * indexURLEntry.urlEntryRow.width(0) / 2); + bb.append('{'); + Iterator i = doms.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + bb.append((String) entry.getKey()); + bb.append(':'); + bb.append((String) entry.getValue()); + if (System.currentTimeMillis() > timeout) break; + if (i.hasNext()) bb.append(','); + } + bb.append('}'); + bb.trim(); + return bb; + } + public void setWordHash(String newWordHash) { this.wordHash = newWordHash; } @@ -94,15 +134,18 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain public int add(indexContainer c, long maxTime) { // returns the number of new elements - long startTime = System.currentTimeMillis(); + long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; if (c == null) return 0; int x = 0; synchronized (c) { Iterator i = c.entries(); - while ((i.hasNext()) && ((maxTime < 0) || ((startTime + maxTime) > System.currentTimeMillis()))) { + while (i.hasNext()) { try { if (addi((indexEntry) i.next())) x++; - } catch (ConcurrentModificationException e) {} + } catch (ConcurrentModificationException e) { + e.printStackTrace(); + } + if (System.currentTimeMillis() > timeout) break; } } this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated()); @@ -202,7 +245,7 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain return c; } - public static indexContainer joinContainer(Set containers, long time, int maxDistance) { + public static indexContainer joinContainer(Collection containers, long time, int maxDistance) { long stamp = System.currentTimeMillis(); diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index cf32c2227..d0d62a940 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -25,6 +25,7 @@ package de.anomic.kelondro; import java.util.Iterator; +import java.util.Set; public class kelondroRowCollection { @@ -293,6 +294,18 @@ public class kelondroRowCollection { } } + public void select(Set keys) { + // removes all entries but the ones given by urlselection + if (keys == null) return; + synchronized (this) { + Iterator i = rows(); + kelondroRow.Entry row; + while (i.hasNext()) { + row = (kelondroRow.Entry) i.next(); + if (!(keys.contains(row.getColString(0, null)))) i.remove(); + } + } + } protected final void sort(kelondroOrder newOrder, int newColumn) { if ((this.sortOrder == null) || diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 5a40e0450..384c54e32 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -42,9 +42,11 @@ package de.anomic.plasma; +import java.util.Collection; import java.util.Iterator; -import java.util.Set; +import java.util.Map; import java.util.HashSet; +import java.util.Set; import de.anomic.kelondro.kelondroException; import de.anomic.server.logging.serverLog; @@ -131,7 +133,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { searchThreads = yacySearch.searchHashes(query.queryHashes, query.prefer, query.urlMask, query.maxDistance, urlStore, rcGlobal, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking); // meanwhile do a local search - indexContainer rcLocal = localSearchJoin(localSearchContainers()); + indexContainer rcLocal = localSearchJoin(localSearchContainers(null).values()); plasmaSearchResult localResult = orderLocal(rcLocal, timeout); // catch up global results: @@ -161,7 +163,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { lastEvent = this; return result; } else { - indexContainer rcLocal = localSearchJoin(localSearchContainers()); + indexContainer rcLocal = localSearchJoin(localSearchContainers(null).values()); plasmaSearchResult result = order(rcLocal); result.localContributions = rcLocal.size(); @@ -173,13 +175,14 @@ public final class plasmaSearchEvent extends Thread implements Runnable { } } - public Set localSearchContainers() { + public Map localSearchContainers(Set urlselection) { // search for the set of hashes and return the set of containers containing the seach result // retrieve entities that belong to the hashes profileLocal.startTimer(); - Set containers = wordIndex.getContainers( + Map containers = wordIndex.getContainers( query.queryHashes, + urlselection, true, true, profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION)); @@ -190,7 +193,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { return containers; } - public indexContainer localSearchJoin(Set containers) { + public indexContainer localSearchJoin(Collection containers) { // join a search result and return the joincount (number of pages after join) // since this is a conjunction we return an empty entity if any word is not known diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 71f456475..89d77761e 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -49,6 +49,7 @@ package de.anomic.plasma; import java.io.File; import java.io.IOException; +import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.HashSet; @@ -321,11 +322,11 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { return condenser.RESULT_SIMI_WORDS; } - public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { + public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { long start = System.currentTimeMillis(); // get from cache - indexContainer container = ramCache.getContainer(wordHash, true, -1); + indexContainer container = ramCache.getContainer(wordHash, urlselection, true, -1); // We must not use the container from cache to store everything we find, // as that container remains linked to in the cache and might be changed later @@ -336,18 +337,18 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { // get from collection index if (useCollectionIndex) { if (container == null) { - container = collections.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime); + container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime); } else { - container.add(collections.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1); + container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1); } } // get from assortments if (container == null) { - container = assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime); + container = assortmentCluster.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime); } else { // add containers from assortment cluster - container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1); + container.add(assortmentCluster.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1); } // get from backend @@ -355,14 +356,14 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { maxTime = maxTime - (System.currentTimeMillis() - start); if (maxTime < 0) maxTime = 100; } - container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1); + container.add(backend.getContainer(wordHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1); return container; } - public Set getContainers(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) { + public Map getContainers(Set wordHashes, Set urlselection, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) { // retrieve entities that belong to the hashes - HashSet containers = new HashSet(); + HashMap containers = new HashMap(); String singleHash; indexContainer singleContainer; Iterator i = wordHashes.iterator(); @@ -378,12 +379,12 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { singleHash = (String) i.next(); // retrieve index - singleContainer = getContainer(singleHash, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); + singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); // check result - if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashSet(); + if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap(); - containers.add(singleContainer); + containers.put(singleHash, singleContainer); } return containers; } diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 373b78378..51e114dfe 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -295,17 +295,17 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl return initialSize - urlHashes.size(); } - public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { + public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { // collect all records from all the assortments and return them indexContainer buffer, record = new indexRowSetContainer(wordHash); - long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; - long remainingTime; + long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; for (int i = 0; i < clusterCount; i++) { buffer = assortments[i].get(wordHash); - remainingTime = limitTime - System.currentTimeMillis(); - if (0 > remainingTime) break; - if (buffer != null) record.add(buffer, remainingTime); - + if (buffer != null) { + buffer.select(urlselection); + record.add(buffer, -1); + } + if (System.currentTimeMillis() > timeout) break; } return record; } diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index 7132b1fd6..dac96ddac 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -99,7 +99,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index } public Object next() { - return getContainer((String) wordIterator.next(), true, 100); + return getContainer((String) wordIterator.next(), null, true, 100); } public void remove() { @@ -225,7 +225,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index } } - public synchronized indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { + public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { long start = System.currentTimeMillis(); if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute if (plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists()) { @@ -235,7 +235,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index Iterator i = entity.elements(true); while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) { entry = (indexEntry) i.next(); - container.add(entry); + if ((urlselection == null) || (urlselection.contains(entry.urlHash()))) container.add(entry); } return container; } else {