added new methods to count the number of objects in RWIs. lots of refactoring was necessary to introduce new Rating class and to unify naming of methods

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7896 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2011-08-25 10:35:25 +00:00
parent 75df87832c
commit 2c595a6a47
26 changed files with 704 additions and 339 deletions

View File

@ -305,7 +305,7 @@ public class IndexControlRWIs_p {
// generate list
if (post.containsKey("keyhashsimilar")) try {
final Iterator<ReferenceContainer<WordReference>> containerIt = segment.termIndex().references(keyhash, true, 256, false).iterator();
final Iterator<ReferenceContainer<WordReference>> containerIt = segment.termIndex().referenceContainer(keyhash, true, 256, false).iterator();
ReferenceContainer<WordReference> container;
i = 0;
int rows = 0, cols = 0;

View File

@ -470,7 +470,7 @@ public class Segment {
DigestURI url = null;
final HandleSet urlHashs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
try {
Iterator<ReferenceContainer<WordReference>> indexContainerIterator = Segment.this.termIndex.references(this.startHash, false, 100, false).iterator();
Iterator<ReferenceContainer<WordReference>> indexContainerIterator = Segment.this.termIndex.referenceContainer(this.startHash, false, 100, false).iterator();
while (indexContainerIterator.hasNext() && this.run) {
waiter();
container = indexContainerIterator.next();
@ -503,7 +503,7 @@ public class Segment {
if (!containerIterator.hasNext()) {
// We may not be finished yet, try to get the next chunk of wordHashes
final TreeSet<ReferenceContainer<WordReference>> containers = Segment.this.termIndex.references(container.getTermHash(), false, 100, false);
final TreeSet<ReferenceContainer<WordReference>> containers = Segment.this.termIndex.referenceContainer(container.getTermHash(), false, 100, false);
indexContainerIterator = containers.iterator();
// Make sure we don't get the same wordhash twice, but don't skip a word
if ((indexContainerIterator.hasNext()) && (!container.getTermHash().equals(indexContainerIterator.next().getTermHash()))) {

View File

@ -168,7 +168,7 @@ public class Dispatcher {
final ArrayList<ReferenceContainer<WordReference>> containers = new ArrayList<ReferenceContainer<WordReference>>(maxContainerCount);
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = this.segment.termIndex().references(hash, true, ram);
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = this.segment.termIndex().referenceContainerIterator(hash, true, ram);
ReferenceContainer<WordReference> container;
int refcount = 0;

View File

@ -0,0 +1,77 @@
/**
* AbstractOrder
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 25.08.2011 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.ranking;
public abstract class AbstractOrder<A> implements Order<A> {
protected A zero = null;
protected boolean asc = true;
@Override
abstract public Order<A> clone();
public A zero() {
return this.zero;
}
public void direction(final boolean ascending) {
this.asc = ascending;
}
public long partition(final A key, final int forks) {
final long d = (Long.MAX_VALUE / forks) + ((Long.MAX_VALUE % forks) + 1) / forks;
return cardinal(key) / d;
}
public void rotate(final A newzero) {
this.zero = newzero;
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof Order<?>)) return false;
final Order<A> other = (Order<A>) obj;
final String thisSig = signature();
final String otherSig = other.signature();
if ((thisSig == null) || (otherSig == null)) return false;
return thisSig.equals(otherSig);
}
@Override
public int hashCode() {
return signature().hashCode();
}
public A smallest(final A a, final A b) {
return (compare(a, b) > 0) ? b : a;
}
public A largest(final A a, final A b) {
return (compare(a, b) > 0) ? a : b;
}
}

View File

@ -0,0 +1,85 @@
/**
* Order
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 25.08.2011 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.ranking;
import java.util.Comparator;
public interface Order<A> extends Comparator<A> {
/**
* returns true if and only if a has only characters that belong to the implemented order
* @param a
* @return
*/
public boolean wellformed(A a);
public Order<A> clone();
/**
* the ordering direction can be changed at any time
* @param ascending
*/
public void direction(boolean ascending);
/**
* returns a signature String so that different orderings have different signatures
* @return
*/
public String signature();
public long partition(A key, int forkes);
/**
* returns a cardinal number in the range of 0 .. Long.MAX_VALUE
* @param key
* @return
*/
public long cardinal(A key);
public int compare(A a, A b);
public boolean equal(A a, A b);
/**
* returns the zero point of the Ordering; null if not defined
* @return
*/
public A zero();
/**
* defines that the ordering rotates, and sets the zero point for the rotation
* @param zero
*/
public void rotate(A zero);
/**
* used to compare different order objects; they may define the same ordering
*/
@Override
public boolean equals(Object o);
@Override
public int hashCode();
}

View File

@ -0,0 +1,82 @@
/**
* Rating
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 25.08.2011 at http://yacy.net
*
* $LastChangedDate: 2011-03-08 02:51:51 +0100 (Di, 08 Mrz 2011) $
* $LastChangedRevision: 7567 $
* $LastChangedBy: low012 $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.ranking;
import java.util.Comparator;
public class Rating<A> {
private final A object;
private long score;
public Rating(final A o, final long score) {
this.object = o;
this.score = score;
}
public void setScore(final long score) {
this.score = score;
}
public long getScore() {
return this.score;
}
public A getObject() {
return this.object;
}
@SuppressWarnings("rawtypes")
public final static ObjectComparator<?> objectComparator = new ObjectComparator();
public final static ScoreComparator scoreComparator = new ScoreComparator();
public static class ObjectComparator<B> implements Comparator<Rating<B>> {
@SuppressWarnings("unchecked")
public int compare(final Rating<B> arg0, final Rating<B> arg1) {
if (!(arg0 instanceof Comparable<?>)) throw new UnsupportedOperationException("object class must implement comparable");
return ((Comparable<B>) arg0.getObject()).compareTo(arg1.getObject());
}
}
public static class ScoreComparator implements Comparator<Rating<?>> {
public int compare(final Rating<?> arg0, final Rating<?> arg1) {
if (arg0.getScore() < arg1.getScore()) return -1;
if (arg0.getScore() > arg1.getScore()) return 1;
return 0;
}
}
public static class FoldedScoreComparator<B extends Comparable<B>> implements Comparator<Rating<B>> {
public int compare(final Rating<B> arg0, final Rating<B> arg1) {
final int c = scoreComparator.compare(arg0, arg1);
if (c != 0) return c;
return arg0.getObject().compareTo(arg1.getObject());
}
}
}

View File

@ -0,0 +1,66 @@
// RatingOrder.java
// -----------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://yacy.net
// Frankfurt, Germany, 2011
// created 25.08.2011
//
// $LastChangedDate: 2011-03-08 02:51:51 +0100 (Di, 08 Mrz 2011) $
// $LastChangedRevision: 7567 $
// $LastChangedBy: low012 $
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.ranking;
public class RatingOrder<A> extends AbstractOrder<Rating<A>> implements Order<Rating<A>> {
Order<A> ordering;
public RatingOrder(final Order<A> ordering) {
this.ordering = ordering;
}
public int compare(final Rating<A> a, final Rating<A> b) {
return this.ordering.compare(a.getObject(), b.getObject());
}
@Override
public boolean wellformed(final Rating<A> a) {
return true;
}
@Override
public String signature() {
return "RA";
}
@Override
public long cardinal(final Rating<A> key) {
return key.getScore();
}
@Override
public boolean equal(final Rating<A> a, final Rating<A> b) {
return this.ordering.compare(a.getObject(), b.getObject()) == 1;
}
@Override
public Order<Rating<A>> clone() {
return this;
}
}

View File

@ -37,13 +37,13 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.ranking.AbstractOrder;
import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.AbstractOrder;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.order.Order;
import net.yacy.kelondro.util.ByteBuffer;
import net.yacy.kelondro.util.kelondroException;

View File

@ -1,79 +0,0 @@
// AbstractOrder.java
// -----------------------
// part of The Kelondro Database
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// created 29.12.2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.order;
public abstract class AbstractOrder<A> implements Order<A> {
protected A zero = null;
protected boolean asc = true;
@Override
abstract public Order<A> clone();
public A zero() {
return zero;
}
public void direction(final boolean ascending) {
asc = ascending;
}
public long partition(final A key, final int forks) {
final long d = (Long.MAX_VALUE / forks) + ((Long.MAX_VALUE % forks) + 1) / forks;
return cardinal(key) / d;
}
public void rotate(final A newzero) {
this.zero = newzero;
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof Order<?>)) return false;
Order<A> other = (Order<A>) obj;
final String thisSig = this.signature();
final String otherSig = other.signature();
if ((thisSig == null) || (otherSig == null)) return false;
return thisSig.equals(otherSig);
}
@Override
public int hashCode() {
return this.signature().hashCode();
}
public A smallest(A a, A b) {
return (compare(a, b) > 0) ? b : a;
}
public A largest(A a, A b) {
return (compare(a, b) > 0) ? a : b;
}
}

View File

@ -30,6 +30,8 @@ package net.yacy.kelondro.order;
import java.util.Comparator;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.ranking.AbstractOrder;
import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;

View File

@ -27,6 +27,7 @@
package net.yacy.kelondro.order;
import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;

View File

@ -29,6 +29,7 @@ import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.logging.Log;

View File

@ -29,6 +29,8 @@ package net.yacy.kelondro.order;
import java.util.Comparator;
import java.util.Iterator;
import net.yacy.cora.ranking.AbstractOrder;
import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;

View File

@ -1,58 +0,0 @@
// Order.java
// -----------------------
// part of The Kelondro Database
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// created 29.12.2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.order;
import java.util.Comparator;
public interface Order<A> extends Comparator<A> {
public boolean wellformed(A a); // returns true if and only if a has only characters that belong to the implemented order
public Order<A> clone();
public void direction(boolean ascending); // the ordering direction can be changed at any time
public String signature(); // returns a signature String so that different orderings have different signatures
public long partition(A key, int forkes);
public long cardinal(A key); // returns a cardinal number in the range of 0 .. Long.MAX_VALUE
public int compare(A a, A b);
public boolean equal(A a, A b);
public A zero(); // returns the zero point of the Ordering; null if not defined
public void rotate(A zero); // defines that the ordering rotates, and sets the zero point for the rotation
@Override
public boolean equals(Object o); // used to compare different order objects; they may define the same ordering
@Override
public int hashCode();
}

View File

@ -30,6 +30,7 @@ package net.yacy.kelondro.order;
import java.util.Comparator;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.ranking.Order;
public class StringOrder implements Comparator<String> {

View File

@ -10,7 +10,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -31,8 +31,7 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.TreeSet;
import net.yacy.kelondro.order.Order;
import net.yacy.cora.ranking.Order;
public abstract class AbstractBufferedIndex<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> implements BufferedIndex<ReferenceType> {
@ -40,16 +39,16 @@ public abstract class AbstractBufferedIndex<ReferenceType extends Reference> ext
public AbstractBufferedIndex(final ReferenceFactory<ReferenceType> factory) {
super(factory);
}
public synchronized TreeSet<ReferenceContainer<ReferenceType>> references(byte[] startHash, final boolean rot, int count, boolean ram) throws IOException {
public synchronized TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(byte[] startHash, final boolean rot, int count, final boolean ram) throws IOException {
// creates a set of indexContainers
// this does not use the cache
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.termKeyOrdering().clone());
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, termKeyOrdering().clone());
if (startHash != null && startHash.length == 0) startHash = null;
ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startHash);
final ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(this.factory, startHash);
containerOrder.rotate(emptyContainer);
final TreeSet<ReferenceContainer<ReferenceType>> containers = new TreeSet<ReferenceContainer<ReferenceType>>(containerOrder);
final Iterator<ReferenceContainer<ReferenceType>> i = references(startHash, rot, ram);
final Iterator<ReferenceContainer<ReferenceType>> i = referenceContainerIterator(startHash, rot, ram);
if (ram) count = Math.min(size(), count);
ReferenceContainer<ReferenceType> container;
// this loop does not terminate using the i.hasNex() predicate when rot == true

View File

@ -10,7 +10,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -32,44 +32,44 @@ import java.util.Iterator;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Order;
public abstract class AbstractIndex <ReferenceType extends Reference> implements Index<ReferenceType> {
final protected ReferenceFactory<ReferenceType> factory;
public AbstractIndex(final ReferenceFactory<ReferenceType> factory) {
this.factory = factory;
}
/**
* merge this index with another index
* @param otherIndex
* @throws IOException
* @throws RowSpaceExceededException
* @throws IOException
* @throws RowSpaceExceededException
*/
public void merge(Index<ReferenceType> otherIndex) throws IOException, RowSpaceExceededException {
public void merge(final Index<ReferenceType> otherIndex) throws IOException, RowSpaceExceededException {
byte[] term;
for (ReferenceContainer<ReferenceType> otherContainer: otherIndex) {
for (final ReferenceContainer<ReferenceType> otherContainer: otherIndex) {
term = otherContainer.getTermHash();
synchronized (this) {
ReferenceContainer<ReferenceType> container = this.get(term, null);
final ReferenceContainer<ReferenceType> container = get(term, null);
if (container == null) {
this.add(otherContainer);
} else {
container.merge(otherContainer);
this.delete(term); // in some file-based environments we cannot just change the container
delete(term); // in some file-based environments we cannot just change the container
this.add(container);
}
}
}
}
public void removeDelayed(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException {
// remove the same url hashes for multiple words
// this is mainly used when correcting a index after a search
@ -78,7 +78,7 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
removeDelayed(i.next(), urlHashBytes);
}
}
public int remove(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException {
// remove the same url hashes for multiple words
// this is mainly used when correcting a index after a search
@ -89,15 +89,15 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
}
return c;
}
public synchronized TreeSet<ReferenceContainer<ReferenceType>> references(final byte[] startHash, final boolean rot, int count) throws IOException {
public synchronized TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(final byte[] startHash, final boolean rot, int count) throws IOException {
// creates a set of indexContainers
// this does not use the cache
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.termKeyOrdering().clone());
final ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startHash);
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, termKeyOrdering().clone());
final ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(this.factory, startHash);
containerOrder.rotate(emptyContainer);
final TreeSet<ReferenceContainer<ReferenceType>> containers = new TreeSet<ReferenceContainer<ReferenceType>>(containerOrder);
final Iterator<ReferenceContainer<ReferenceType>> i = references(startHash, rot);
final Iterator<ReferenceContainer<ReferenceType>> i = referenceContainerIterator(startHash, rot);
//if (ram) count = Math.min(size(), count);
ReferenceContainer<ReferenceType> container;
// this loop does not terminate using the i.hasNex() predicate when rot == true
@ -113,10 +113,10 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
}
return containers; // this may return less containers as demanded
}
// methods to search in the index
/**
* collect containers for given word hashes.
* This collection stops if a single container does not contain any references.
@ -139,26 +139,26 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
ReferenceContainer<ReferenceType> singleContainer;
final Iterator<byte[]> i = wordHashes.iterator();
while (i.hasNext()) {
// get next word hash:
singleHash = i.next();
// retrieve index
try {
singleContainer = this.get(singleHash, urlselection);
} catch (IOException e) {
singleContainer = get(singleHash, urlselection);
} catch (final IOException e) {
Log.logException(e);
continue;
}
// check result
if ((singleContainer == null || singleContainer.isEmpty())) return new TreeMap<byte[], ReferenceContainer<ReferenceType>>(Base64Order.enhancedCoder);
containers.put(singleHash, singleContainer);
}
return containers;
}
/**
* collect containers for given word hashes and join them as they are retrieved.
* This collection stops if a single container does not contain any references
@ -168,39 +168,39 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
* @param urlselection
* @param maxDistance the maximum distance that the words in the result may have
* @return ReferenceContainer the join result
* @throws RowSpaceExceededException
* @throws RowSpaceExceededException
*/
public ReferenceContainer<ReferenceType> searchJoin(final HandleSet wordHashes, final HandleSet urlselection, final int maxDistance) throws RowSpaceExceededException {
// first check if there is any entry that has no match;
// this uses only operations in ram
for (byte[] wordHash: wordHashes) {
if (!this.has(wordHash)) return ReferenceContainer.emptyContainer(factory, null, 0);
for (final byte[] wordHash: wordHashes) {
if (!has(wordHash)) return ReferenceContainer.emptyContainer(this.factory, null, 0);
}
// retrieve entities that belong to the hashes
ReferenceContainer<ReferenceType> resultContainer = null;
ReferenceContainer<ReferenceType> singleContainer;
for (byte[] wordHash: wordHashes) {
for (final byte[] wordHash: wordHashes) {
// retrieve index
try {
singleContainer = this.get(wordHash, urlselection);
} catch (IOException e) {
singleContainer = get(wordHash, urlselection);
} catch (final IOException e) {
Log.logException(e);
continue;
}
// check result
if ((singleContainer == null || singleContainer.isEmpty())) return ReferenceContainer.emptyContainer(factory, null, 0);
if ((singleContainer == null || singleContainer.isEmpty())) return ReferenceContainer.emptyContainer(this.factory, null, 0);
if (resultContainer == null) resultContainer = singleContainer; else {
resultContainer = ReferenceContainer.joinConstructive(factory, resultContainer, singleContainer, maxDistance);
resultContainer = ReferenceContainer.joinConstructive(this.factory, resultContainer, singleContainer, maxDistance);
}
// finish if the result is empty
if (resultContainer.isEmpty()) return resultContainer;
}
return resultContainer;
}
public TermSearch<ReferenceType> query(
final HandleSet queryHashes,
final HandleSet excludeHashes,

View File

@ -10,7 +10,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -50,7 +50,7 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
/*
* methods for monitoring of the buffer
*/
/**
* set the size of the buffer, which can be defined with a given maximum number
* of words that shall be stored. Because an arbitrary number of references can
@ -60,7 +60,7 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
* limit.
*/
public void setBufferMaxWordCount(final int maxWords);
/**
* return the maximum number of references, that one buffer entry has stored
* @return
@ -78,7 +78,7 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
* @return a time as milliseconds from epoch
*/
public long getBufferMaxAge();
/**
* calculate the memory that is taken by the buffer.
* This does not simply return a variable content. it is necessary
@ -93,7 +93,7 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
* @return number of word references
*/
public int getBufferSize();
/**
* iterate over entries in index. this method differs from the iterator in an Index
* object in such a way that it has the additional 'buffer' flag. When using this method,
@ -105,12 +105,12 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
* @return
* @throws IOException
*/
public CloneableIterator<ReferenceContainer<ReferenceType>> references(
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(
byte[] startHash,
boolean rot,
boolean buffer
) throws IOException;
/**
* collect reference container in index. this method differs from the collector in an Index
@ -124,11 +124,11 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
* @return
* @throws IOException
*/
public TreeSet<ReferenceContainer<ReferenceType>> references(
public TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(
byte[] startHash,
boolean rot,
int count,
boolean buffer
) throws IOException;
}

View File

@ -10,7 +10,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -32,6 +32,7 @@ import java.io.IOException;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.cora.ranking.Rating;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.order.ByteOrder;
@ -39,19 +40,19 @@ import net.yacy.kelondro.order.CloneableIterator;
public interface Index <ReferenceType extends Reference> extends Iterable<ReferenceContainer<ReferenceType>> {
/**
* every index entry is made for a term which has a fixed size
* @return the size of the term
*/
public int termKeyLength();
/**
* merge this index with another index
* @param otherIndex
*/
public void merge(Index<ReferenceType> otherIndex) throws IOException, RowSpaceExceededException;
/**
* add references to the reverse index
* if no references to the word are stored, the new Entries are added,
@ -59,7 +60,7 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* reference to be stored, then the old and the new references are merged
* @param newEntries the References to be merged with existing references
* @throws IOException
* @throws RowSpaceExceededException
* @throws RowSpaceExceededException
*/
public void add(ReferenceContainer<ReferenceType> newEntries) throws IOException, RowSpaceExceededException;
@ -71,17 +72,17 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* @param termHash
* @param entry
* @throws IOException
* @throws RowSpaceExceededException
* @throws RowSpaceExceededException
*/
public void add(final byte[] termHash, final ReferenceType entry) throws IOException, RowSpaceExceededException;
/**
* check if there are references stored to the given word hash
* @param termHash
* @return true if references exist, false if not
*/
public boolean has(final byte[] termHash); // should only be used if in case that true is returned the getContainer is NOT called
/**
* count the number of references for the given word
* do not use this method to check the existence of a reference by comparing
@ -90,7 +91,7 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* @return the number of references to the given word
*/
public int count(final byte[] termHash);
/**
* get the references to a given word.
* if referenceselection is not null, then all url references which are not
@ -101,7 +102,7 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* @throws IOException
*/
public ReferenceContainer<ReferenceType> get(byte[] termHash, HandleSet referenceselection) throws IOException;
/**
* delete all references for a word
* @param termHash
@ -109,7 +110,7 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* @throws IOException
*/
public ReferenceContainer<ReferenceType> delete(byte[] termHash) throws IOException;
/**
* remove a specific reference entry
* @param termHash
@ -119,7 +120,7 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
*/
public boolean remove(byte[] termHash, byte[] referenceHash) throws IOException;
public void removeDelayed(byte[] termHash, byte[] referenceHash) throws IOException;
/**
* remove a set of reference entries for a given word
* @param termHash the key for the references
@ -133,6 +134,7 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
public void removeDelayed(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException;
public void removeDelayed() throws IOException;
/**
* iterate all references from the beginning of a specific word hash
* @param startHash
@ -141,13 +143,26 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* @return
* @throws IOException
*/
public CloneableIterator<ReferenceContainer<ReferenceType>> references(
byte[] startHash,
boolean rot
) throws IOException;
public CloneableIterator<Rating<byte[]>> referenceCountIterator(
byte[] startHash,
boolean rot
) throws IOException;
public TreeSet<ReferenceContainer<ReferenceType>> references(
/**
* iterate all references from the beginning of a specific word hash
* @param startHash
* @param rot if true, then rotate at the end to the beginning
* @param ram
* @return
* @throws IOException
*/
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(
byte[] startHash,
boolean rot
) throws IOException;
public TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(
byte[] startHash,
boolean rot,
int count
@ -160,31 +175,31 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* @param urlselection
* @return map of wordhash:indexContainer
*/
public TreeMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection);
public TreeMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection);
/**
* delete all references entries
* @throws IOException
*/
public void clear() throws IOException;
/**
* close the reverse index
*/
public void close();
/**
* the number of all references
* @return the nnumber of all references
*/
public int size();
/**
* calculate needed memory
* @return the memory needed to operate the object
*/
public int minMem();
/**
* return the order that is used for the storage of the word hashes
* @return

View File

@ -32,6 +32,9 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import net.yacy.cora.ranking.Order;
import net.yacy.cora.ranking.Rating;
import net.yacy.cora.ranking.RatingOrder;
import net.yacy.cora.storage.ComparableARC;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.index.HandleSet;
@ -40,7 +43,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.order.MergeIterator;
import net.yacy.kelondro.order.Order;
import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.MemoryControl;
@ -447,16 +449,32 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
}
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
return references(null, false);
return referenceContainerIterator(null, false);
}
public CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] starttermHash, final boolean rot) {
public CloneableIterator<Rating<byte[]>> referenceCountIterator(final byte[] starttermHash, final boolean rot) {
final RatingOrder<byte[]> containerOrder = new RatingOrder<byte[]>(this.ram.rowdef().getOrdering());
containerOrder.rotate(new Rating<byte[]>(starttermHash, 0));
return new MergeIterator<Rating<byte[]>>(
this.ram.referenceCountIterator(starttermHash, rot),
new MergeIterator<Rating<byte[]>>(
this.ram.referenceCountIterator(starttermHash, false),
this.array.referenceCountIterator(starttermHash, false),
containerOrder,
ReferenceContainer.containerMergeMethod,
true),
containerOrder,
ReferenceContainer.containerMergeMethod,
true);
}
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] starttermHash, final boolean rot) {
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, this.ram.rowdef().getOrdering().clone());
containerOrder.rotate(new ReferenceContainer<ReferenceType>(this.factory, starttermHash));
return new MergeIterator<ReferenceContainer<ReferenceType>>(
this.ram.references(starttermHash, rot),
this.ram.referenceContainerIterator(starttermHash, rot),
new MergeIterator<ReferenceContainer<ReferenceType>>(
this.ram.references(starttermHash, false),
this.ram.referenceContainerIterator(starttermHash, false),
this.array.referenceContainerIterator(starttermHash, false),
containerOrder,
ReferenceContainer.containerMergeMethod,
@ -466,14 +484,14 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
true);
}
public CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startTermHash, final boolean rot, final boolean ram) {
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] startTermHash, final boolean rot, final boolean ram) {
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, this.ram.rowdef().getOrdering().clone());
containerOrder.rotate(new ReferenceContainer<ReferenceType>(this.factory, startTermHash));
if (ram) {
return this.ram.references(startTermHash, rot);
return this.ram.referenceContainerIterator(startTermHash, rot);
}
return new MergeIterator<ReferenceContainer<ReferenceType>>(
this.ram.references(startTermHash, false),
this.ram.referenceContainerIterator(startTermHash, false),
this.array.referenceContainerIterator(startTermHash, false),
containerOrder,
ReferenceContainer.containerMergeMethod,

View File

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -34,8 +34,8 @@ public interface IndexReader<ReferenceType extends Reference> {
public int size();
public boolean has(byte[] wordHash); // should only be used if in case that true is returned the getContainer is NOT called
public ReferenceContainer<ReferenceType> get(byte[] wordHash, HandleSet urlselection);
public CloneableIterator<ReferenceContainer<ReferenceType>> references(byte[] startWordHash, boolean rot);
public ReferenceContainer<ReferenceType> get(byte[] wordHash, HandleSet urlselection);
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(byte[] startWordHash, boolean rot);
public void close();
}

View File

@ -7,7 +7,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -29,6 +29,7 @@ import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import net.yacy.cora.ranking.Rating;
import net.yacy.kelondro.blob.ArrayStack;
import net.yacy.kelondro.blob.BLOB;
import net.yacy.kelondro.index.HandleMap;
@ -45,7 +46,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
protected final ReferenceFactory<ReferenceType> factory;
protected final ArrayStack array;
private final IODispatcher merger;
/**
* open a index container array based on BLOB dumps. The content of the BLOBs will not be read
* unless a .idx file exists. Only the .idx file is opened to get a fast read access to
@ -54,7 +55,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
* is still possible
* @param payloadrow the row definition for the BLOB data structure
* @param log
* @throws IOException
* @throws IOException
*/
public ReferenceContainerArray(
final File heapLocation,
@ -62,7 +63,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
final ReferenceFactory<ReferenceType> factory,
final ByteOrder termOrder,
final int termSize,
IODispatcher merger) throws IOException {
final IODispatcher merger) throws IOException {
this.factory = factory;
this.array = new ArrayStack(
heapLocation,
@ -74,49 +75,49 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
assert merger != null;
this.merger = merger;
}
public void close() {
this.array.close(true);
}
public void clear() throws IOException {
this.array.clear();
}
public long mem() {
return array.mem();
return this.array.mem();
}
public int[] sizes() {
return (this.array == null) ? new int[0] : this.array.sizes();
}
public ByteOrder ordering() {
return this.array.ordering();
}
public File newContainerBLOBFile() {
return this.array.newBLOB(new Date());
}
public void mountBLOBFile(File location) throws IOException {
public void mountBLOBFile(final File location) throws IOException {
this.array.mountBLOB(location, false);
}
public Row rowdef() {
return this.factory.getRow();
}
/**
* return an iterator object that creates top-level-clones of the indexContainers
* in the cache, so that manipulations of the iterated objects do not change
* objects in the cache.
* @throws IOException
* @throws IOException
*/
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] startWordHash, final boolean rot) {
try {
return new ReferenceContainerIterator(startWordHash, rot);
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
return null;
}
@ -128,59 +129,132 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
// and because every indexContainer Object that is iterated must be returned as top-level-clone
// so this class simulates wCache.tailMap(startWordHash).values().iterator()
// plus the mentioned features
private final boolean rot;
protected CloneableIterator<byte[]> iterator;
public ReferenceContainerIterator(final byte[] startWordHash, final boolean rot) throws IOException {
this.rot = rot;
this.iterator = array.keys(true, startWordHash);
this.iterator = ReferenceContainerArray.this.array.keys(true, startWordHash);
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
}
public ReferenceContainerIterator clone(final Object secondWordHash) {
try {
return new ReferenceContainerIterator((byte[]) secondWordHash, rot);
} catch (IOException e) {
return new ReferenceContainerIterator((byte[]) secondWordHash, this.rot);
} catch (final IOException e) {
Log.logException(e);
return null;
}
}
public boolean hasNext() {
if (this.iterator == null) return false;
if (rot) return true;
return iterator.hasNext();
if (this.rot) return true;
return this.iterator.hasNext();
}
public ReferenceContainer<ReferenceType> next() {
if (iterator.hasNext()) try {
return get(iterator.next());
} catch (Exception e) {
if (this.iterator.hasNext()) try {
return get(this.iterator.next());
} catch (final Exception e) {
Log.logException(e);
return null;
}
// rotation iteration
if (!rot) {
if (!this.rot) {
return null;
}
try {
iterator = array.keys(true, null);
return get(iterator.next());
} catch (Exception e) {
this.iterator = ReferenceContainerArray.this.array.keys(true, null);
return get(this.iterator.next());
} catch (final Exception e) {
Log.logException(e);
return null;
}
}
public void remove() {
iterator.remove();
this.iterator.remove();
}
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
return this;
}
}
/**
* return an iterator object that counts the number of references in indexContainers
* the startWordHash may be null to iterate all from the beginning
* @throws IOException
*/
public CloneableIterator<Rating<byte[]>> referenceCountIterator(final byte[] startWordHash, final boolean rot) {
try {
return new ReferenceCountIterator(startWordHash, rot);
} catch (final IOException e) {
Log.logException(e);
return null;
}
}
public class ReferenceCountIterator implements CloneableIterator<Rating<byte[]>>, Iterable<Rating<byte[]>> {
private final boolean rot;
protected CloneableIterator<byte[]> iterator;
public ReferenceCountIterator(final byte[] startWordHash, final boolean rot) throws IOException {
this.rot = rot;
this.iterator = ReferenceContainerArray.this.array.keys(true, startWordHash);
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
}
public ReferenceCountIterator clone(final Object secondWordHash) {
try {
return new ReferenceCountIterator((byte[]) secondWordHash, this.rot);
} catch (final IOException e) {
Log.logException(e);
return null;
}
}
public boolean hasNext() {
if (this.iterator == null) return false;
if (this.rot) return true;
return this.iterator.hasNext();
}
public Rating<byte[]> next() {
byte[] reference;
if (this.iterator.hasNext()) try {
reference = this.iterator.next();
return new Rating<byte[]>(reference, count(reference));
} catch (final Exception e) {
Log.logException(e);
return null;
}
// rotation iteration
if (!this.rot) {
return null;
}
try {
this.iterator = ReferenceContainerArray.this.array.keys(true, null);
reference = this.iterator.next();
return new Rating<byte[]>(reference, count(reference));
} catch (final Exception e) {
Log.logException(e);
return null;
}
}
public void remove() {
this.iterator.remove();
}
public Iterator<Rating<byte[]>> iterator() {
return this;
}
}
/**
@ -188,24 +262,24 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
* this works with heaps in write- and read-mode
* @param key
* @return true, if the key is used in the heap; false otherwise
* @throws IOException
* @throws IOException
*/
public boolean has(final byte[] termHash) {
return this.array.containsKey(termHash);
}
/**
* get a indexContainer from a heap
* @param key
* @return the indexContainer if one exist, null otherwise
* @throws IOException
* @throws RowSpaceExceededException
* @throws IOException
* @throws RowSpaceExceededException
*/
public ReferenceContainer<ReferenceType> get(final byte[] termHash) throws IOException, RowSpaceExceededException {
long timeout = System.currentTimeMillis() + 3000;
Iterator<byte[]> entries = this.array.getAll(termHash).iterator();
final long timeout = System.currentTimeMillis() + 3000;
final Iterator<byte[]> entries = this.array.getAll(termHash).iterator();
if (entries == null || !entries.hasNext()) return null;
byte[] a = entries.next();
final byte[] a = entries.next();
int k = 1;
ReferenceContainer<ReferenceType> c = new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(a, this.factory.getRow()));
if (System.currentTimeMillis() > timeout) {
@ -222,12 +296,12 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
}
return c;
}
public int count(final byte[] termHash) throws IOException {
long timeout = System.currentTimeMillis() + 3000;
Iterator<Long> entries = this.array.lengthAll(termHash).iterator();
final long timeout = System.currentTimeMillis() + 3000;
final Iterator<Long> entries = this.array.lengthAll(termHash).iterator();
if (entries == null || !entries.hasNext()) return 0;
Long a = entries.next();
final Long a = entries.next();
int k = 1;
int c = RowSet.importRowCount(a, this.factory.getRow());
assert c >= 0;
@ -247,7 +321,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
assert c >= 0;
return c;
}
/**
* calculate an upper limit for a ranking number of the container size
* the returned number is not a counter. It can only be used to compare the
@ -259,110 +333,110 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
public long lenghtRankingUpperLimit(final byte[] termHash) throws IOException {
return this.array.lengthAdd(termHash);
}
/**
* delete a indexContainer from the heap cache. This can only be used for write-enabled heaps
* @param wordHash
* @return the indexContainer if the cache contained the container, null otherwise
* @throws IOException
* @throws IOException
*/
public void delete(final byte[] termHash) throws IOException {
// returns the index that had been deleted
array.delete(termHash);
this.array.delete(termHash);
}
public int reduce(final byte[] termHash, ContainerReducer<ReferenceType> reducer) throws IOException, RowSpaceExceededException {
return array.reduce(termHash, new BLOBReducer(termHash, reducer));
public int reduce(final byte[] termHash, final ContainerReducer<ReferenceType> reducer) throws IOException, RowSpaceExceededException {
return this.array.reduce(termHash, new BLOBReducer(termHash, reducer));
}
public class BLOBReducer implements BLOB.Reducer {
ContainerReducer<ReferenceType> rewriter;
byte[] wordHash;
public BLOBReducer(byte[] wordHash, ContainerReducer<ReferenceType> rewriter) {
public BLOBReducer(final byte[] wordHash, final ContainerReducer<ReferenceType> rewriter) {
this.rewriter = rewriter;
this.wordHash = wordHash;
}
public byte[] rewrite(byte[] b) throws RowSpaceExceededException {
public byte[] rewrite(final byte[] b) throws RowSpaceExceededException {
if (b == null) return null;
ReferenceContainer<ReferenceType> c = rewriter.reduce(new ReferenceContainer<ReferenceType>(factory, this.wordHash, RowSet.importRowSet(b, factory.getRow())));
final ReferenceContainer<ReferenceType> c = this.rewriter.reduce(new ReferenceContainer<ReferenceType>(ReferenceContainerArray.this.factory, this.wordHash, RowSet.importRowSet(b, ReferenceContainerArray.this.factory.getRow())));
if (c == null) return null;
byte bb[] = c.exportCollection();
final byte bb[] = c.exportCollection();
assert bb.length <= b.length;
return bb;
}
}
public interface ContainerReducer<ReferenceType extends Reference> {
public ReferenceContainer<ReferenceType> reduce(ReferenceContainer<ReferenceType> container);
}
public int entries() {
return this.array.entries();
}
public boolean shrink(long targetFileSize, long maxFileSize) {
public boolean shrink(final long targetFileSize, final long maxFileSize) {
if (this.array.entries() < 2) return false;
boolean donesomething = false;
// first try to merge small files that match
while (this.merger.queueLength() < 3 || this.array.entries() >= 50) {
File[] ff = this.array.unmountBestMatch(2.0f, targetFileSize);
final File[] ff = this.array.unmountBestMatch(2.0f, targetFileSize);
if (ff == null) break;
Log.logInfo("RICELL-shrink1", "unmountBestMatch(2.0, " + targetFileSize + ")");
merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
donesomething = true;
}
// then try to merge simply any small file
while (this.merger.queueLength() < 2) {
File[] ff = this.array.unmountSmallest(targetFileSize);
final File[] ff = this.array.unmountSmallest(targetFileSize);
if (ff == null) break;
Log.logInfo("RICELL-shrink2", "unmountSmallest(" + targetFileSize + ")");
merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
donesomething = true;
}
// if there is no small file, then merge matching files up to limit
while (this.merger.queueLength() < 1) {
File[] ff = this.array.unmountBestMatch(2.0f, maxFileSize);
final File[] ff = this.array.unmountBestMatch(2.0f, maxFileSize);
if (ff == null) break;
Log.logInfo("RICELL-shrink3", "unmountBestMatch(2.0, " + maxFileSize + ")");
merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
donesomething = true;
}
// rewrite old files (hack from sixcooler, see http://forum.yacy-websuche.de/viewtopic.php?p=15004#p15004)
while (this.merger.queueLength() < 1) {
File ff = this.array.unmountOldest();
final File ff = this.array.unmountOldest();
if (ff == null) break;
Log.logInfo("RICELL-shrink4/rewrite", "unmountOldest()");
merger.merge(ff, null, this.factory, this.array, newContainerBLOBFile());
this.merger.merge(ff, null, this.factory, this.array, newContainerBLOBFile());
donesomething = true;
}
return donesomething;
}
public static <ReferenceType extends Reference> HandleMap referenceHashes(
final File heapLocation,
final ReferenceFactory<ReferenceType> factory,
final ByteOrder termOrder,
final Row payloadrow) throws IOException, RowSpaceExceededException {
System.out.println("CELL REFERENCE COLLECTION startup");
HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath());
String[] files = heapLocation.list();
for (String f: files) {
final HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath());
final String[] files = heapLocation.list();
for (final String f: files) {
if (f.length() < 22 || !f.startsWith("text.index") || !f.endsWith(".blob")) continue;
File fl = new File(heapLocation, f);
final File fl = new File(heapLocation, f);
System.out.println("CELL REFERENCE COLLECTION opening blob " + fl);
CloneableIterator<ReferenceContainer<ReferenceType>> ei = new ReferenceIterator<ReferenceType>(fl, factory);
final CloneableIterator<ReferenceContainer<ReferenceType>> ei = new ReferenceIterator<ReferenceType>(fl, factory);
ReferenceContainer<ReferenceType> container;
final long start = System.currentTimeMillis();
long lastlog = start - 27000;
@ -372,7 +446,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
while (ei.hasNext()) {
container = ei.next();
if (container == null) continue;
Iterator<ReferenceType> refi = container.entries();
final Iterator<ReferenceType> refi = container.entries();
while (refi.hasNext()) {
reference = refi.next();
if (reference == null) continue;
@ -392,5 +466,5 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
return references;
}
}

View File

@ -36,6 +36,7 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.ranking.Rating;
import net.yacy.kelondro.blob.HeapWriter;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.Row;
@ -177,6 +178,17 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
return cachecopy;
}
protected List<Rating<ByteArray>> ratingList() {
final List<Rating<ByteArray>> list = new ArrayList<Rating<ByteArray>>(this.cache.size());
synchronized (this.cache) {
for (final Map.Entry<ByteArray, ReferenceContainer<ReferenceType>> entry: this.cache.entrySet()) {
if (entry.getValue() != null && entry.getValue().getTermHash() != null) list.add(new Rating<ByteArray>(entry.getKey(), entry.getValue().size()));
}
}
Collections.sort(list, new Rating.ObjectComparator<ByteArray>());
return list;
}
public int size() {
return (this.cache == null) ? 0 : this.cache.size();
}
@ -195,26 +207,24 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
return max;
}
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
return referenceContainerIterator(null, false);
}
/**
* return an iterator object that creates top-level-clones of the indexContainers
* in the cache, so that manipulations of the iterated objects do not change
* objects in the cache.
*/
public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startWordHash, final boolean rot) {
return new heapCacheIterator(startWordHash, rot);
public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] startWordHash, final boolean rot) {
return new ReferenceContainerIterator(startWordHash, rot);
}
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
return references(null, false);
}
/**
* cache iterator: iterates objects within the heap cache. This can only be used
* for write-enabled heaps, read-only heaps do not have a heap cache
*/
public class heapCacheIterator implements CloneableIterator<ReferenceContainer<ReferenceType>>, Iterable<ReferenceContainer<ReferenceType>> {
public class ReferenceContainerIterator implements CloneableIterator<ReferenceContainer<ReferenceType>>, Iterable<ReferenceContainer<ReferenceType>> {
// this class exists, because the wCache cannot be iterated with rotation
// and because every indexContainer Object that is iterated must be returned as top-level-clone
@ -226,7 +236,7 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
private int p;
private byte[] latestTermHash;
public heapCacheIterator(byte[] startWordHash, final boolean rot) {
public ReferenceContainerIterator(byte[] startWordHash, final boolean rot) {
this.rot = rot;
if (startWordHash != null && startWordHash.length == 0) startWordHash = null;
this.cachecopy = sortedClone();
@ -242,8 +252,8 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
}
public heapCacheIterator clone(final Object secondWordHash) {
return new heapCacheIterator((byte[]) secondWordHash, this.rot);
public ReferenceContainerIterator clone(final Object secondWordHash) {
return new ReferenceContainerIterator((byte[]) secondWordHash, this.rot);
}
public boolean hasNext() {
@ -289,6 +299,75 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
}
@Override
public CloneableIterator<Rating<byte[]>> referenceCountIterator(final byte[] startHash, final boolean rot) {
return new ReferenceCountIterator(startHash, rot);
}
/**
* cache iterator: iterates objects within the heap cache. This can only be used
* for write-enabled heaps, read-only heaps do not have a heap cache
*/
public class ReferenceCountIterator implements CloneableIterator<Rating<byte[]>>, Iterable<Rating<byte[]>> {
private final boolean rot;
private final List<Rating<ByteArray>> cachecounts;
private int p;
private byte[] latestTermHash;
public ReferenceCountIterator(byte[] startWordHash, final boolean rot) {
this.rot = rot;
if (startWordHash != null && startWordHash.length == 0) startWordHash = null;
this.cachecounts = ratingList();
assert this.cachecounts != null;
assert ReferenceContainerCache.this.termOrder != null;
this.p = 0;
if (startWordHash != null) {
while ( this.p < this.cachecounts.size() &&
ReferenceContainerCache.this.termOrder.compare(this.cachecounts.get(this.p).getObject().asBytes(), startWordHash) < 0
) this.p++;
}
this.latestTermHash = null;
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
}
public ReferenceCountIterator clone(final Object secondWordHash) {
return new ReferenceCountIterator((byte[]) secondWordHash, this.rot);
}
public boolean hasNext() {
if (this.rot) return this.cachecounts.size() > 0;
return this.p < this.cachecounts.size();
}
public Rating<byte[]> next() {
if (this.p < this.cachecounts.size()) {
final Rating<ByteArray> c = this.cachecounts.get(this.p++);
this.latestTermHash = c.getObject().asBytes();
return new Rating<byte[]>(c.getObject().asBytes(), c.getScore());
}
// rotation iteration
if (!this.rot) {
return null;
}
if (this.cachecounts.isEmpty()) return null;
this.p = 0;
final Rating<ByteArray> c = this.cachecounts.get(this.p++);
this.latestTermHash = c.getObject().asBytes();
return new Rating<byte[]>(c.getObject().asBytes(), c.getScore());
}
public void remove() {
System.arraycopy(this.cachecounts, this.p, this.cachecounts, this.p - 1, this.cachecounts.size() - this.p);
ReferenceContainerCache.this.cache.remove(new ByteArray(this.latestTermHash));
}
public Iterator<Rating<byte[]>> iterator() {
return this;
}
}
/**
* test if a given key is in the heap
* this works with heaps in write- and read-mode

View File

@ -26,8 +26,8 @@
package net.yacy.kelondro.rwi;
import net.yacy.kelondro.order.AbstractOrder;
import net.yacy.kelondro.order.Order;
import net.yacy.cora.ranking.AbstractOrder;
import net.yacy.cora.ranking.Order;
public class ReferenceContainerOrder<ReferenceType extends Reference> extends AbstractOrder<ReferenceContainer<ReferenceType>> implements Order<ReferenceContainer<ReferenceType>>, Cloneable {

View File

@ -44,6 +44,7 @@ import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.blob.ArrayStack;
import net.yacy.kelondro.index.Cache;
import net.yacy.kelondro.index.HandleSet;
@ -55,7 +56,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.order.MergeIterator;
import net.yacy.kelondro.order.Order;
import net.yacy.kelondro.order.StackIterator;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.NamePrefixThreadFactory;

View File

@ -209,7 +209,7 @@ public final class yacy {
}
sb = new Switchboard(dataHome, appHome, "defaults/yacy.init".replace("/", File.separator), newconf);
//sbSync.V(); // signal that the sb reference was set
// switch the memory strategy
MemoryControl.setStandardStrategy(sb.getConfigBool("memory.standardStrategy", true));
@ -648,7 +648,7 @@ public final class yacy {
new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
10000,
(long) Integer.MAX_VALUE, false, false);
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().references("AAAAAAAAAAAA".getBytes(), false, false);
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().referenceContainerIterator("AAAAAAAAAAAA".getBytes(), false, false);
long urlCounter = 0, wordCounter = 0;
long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = 0;
@ -828,7 +828,7 @@ public final class yacy {
new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
10000,
(long) Integer.MAX_VALUE, false, false);
indexContainerIterator = WordIndex.termIndex().references(wordChunkStartHash.getBytes(), false, false);
indexContainerIterator = WordIndex.termIndex().referenceContainerIterator(wordChunkStartHash.getBytes(), false, false);
}
int counter = 0;
ReferenceContainer<WordReference> container = null;