mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added new methods to count the number of objects in RWIs. lots of refactoring was necessary to introduce new Rating class and to unify naming of methods
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7896 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
75df87832c
commit
2c595a6a47
|
@ -305,7 +305,7 @@ public class IndexControlRWIs_p {
|
|||
|
||||
// generate list
|
||||
if (post.containsKey("keyhashsimilar")) try {
|
||||
final Iterator<ReferenceContainer<WordReference>> containerIt = segment.termIndex().references(keyhash, true, 256, false).iterator();
|
||||
final Iterator<ReferenceContainer<WordReference>> containerIt = segment.termIndex().referenceContainer(keyhash, true, 256, false).iterator();
|
||||
ReferenceContainer<WordReference> container;
|
||||
i = 0;
|
||||
int rows = 0, cols = 0;
|
||||
|
|
|
@ -470,7 +470,7 @@ public class Segment {
|
|||
DigestURI url = null;
|
||||
final HandleSet urlHashs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
|
||||
try {
|
||||
Iterator<ReferenceContainer<WordReference>> indexContainerIterator = Segment.this.termIndex.references(this.startHash, false, 100, false).iterator();
|
||||
Iterator<ReferenceContainer<WordReference>> indexContainerIterator = Segment.this.termIndex.referenceContainer(this.startHash, false, 100, false).iterator();
|
||||
while (indexContainerIterator.hasNext() && this.run) {
|
||||
waiter();
|
||||
container = indexContainerIterator.next();
|
||||
|
@ -503,7 +503,7 @@ public class Segment {
|
|||
|
||||
if (!containerIterator.hasNext()) {
|
||||
// We may not be finished yet, try to get the next chunk of wordHashes
|
||||
final TreeSet<ReferenceContainer<WordReference>> containers = Segment.this.termIndex.references(container.getTermHash(), false, 100, false);
|
||||
final TreeSet<ReferenceContainer<WordReference>> containers = Segment.this.termIndex.referenceContainer(container.getTermHash(), false, 100, false);
|
||||
indexContainerIterator = containers.iterator();
|
||||
// Make sure we don't get the same wordhash twice, but don't skip a word
|
||||
if ((indexContainerIterator.hasNext()) && (!container.getTermHash().equals(indexContainerIterator.next().getTermHash()))) {
|
||||
|
|
|
@ -168,7 +168,7 @@ public class Dispatcher {
|
|||
|
||||
final ArrayList<ReferenceContainer<WordReference>> containers = new ArrayList<ReferenceContainer<WordReference>>(maxContainerCount);
|
||||
|
||||
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = this.segment.termIndex().references(hash, true, ram);
|
||||
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = this.segment.termIndex().referenceContainerIterator(hash, true, ram);
|
||||
ReferenceContainer<WordReference> container;
|
||||
int refcount = 0;
|
||||
|
||||
|
|
77
source/net/yacy/cora/ranking/AbstractOrder.java
Normal file
77
source/net/yacy/cora/ranking/AbstractOrder.java
Normal file
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* AbstractOrder
|
||||
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
||||
* First released 25.08.2011 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate$
|
||||
* $LastChangedRevision$
|
||||
* $LastChangedBy$
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.ranking;
|
||||
|
||||
public abstract class AbstractOrder<A> implements Order<A> {
|
||||
|
||||
protected A zero = null;
|
||||
protected boolean asc = true;
|
||||
|
||||
@Override
|
||||
abstract public Order<A> clone();
|
||||
|
||||
public A zero() {
|
||||
return this.zero;
|
||||
}
|
||||
|
||||
public void direction(final boolean ascending) {
|
||||
this.asc = ascending;
|
||||
}
|
||||
|
||||
public long partition(final A key, final int forks) {
|
||||
final long d = (Long.MAX_VALUE / forks) + ((Long.MAX_VALUE % forks) + 1) / forks;
|
||||
return cardinal(key) / d;
|
||||
}
|
||||
|
||||
public void rotate(final A newzero) {
|
||||
this.zero = newzero;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null) return false;
|
||||
if (!(obj instanceof Order<?>)) return false;
|
||||
final Order<A> other = (Order<A>) obj;
|
||||
final String thisSig = signature();
|
||||
final String otherSig = other.signature();
|
||||
if ((thisSig == null) || (otherSig == null)) return false;
|
||||
return thisSig.equals(otherSig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return signature().hashCode();
|
||||
}
|
||||
|
||||
public A smallest(final A a, final A b) {
|
||||
return (compare(a, b) > 0) ? b : a;
|
||||
}
|
||||
|
||||
public A largest(final A a, final A b) {
|
||||
return (compare(a, b) > 0) ? a : b;
|
||||
}
|
||||
}
|
85
source/net/yacy/cora/ranking/Order.java
Normal file
85
source/net/yacy/cora/ranking/Order.java
Normal file
|
@ -0,0 +1,85 @@
|
|||
/**
|
||||
* Order
|
||||
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
||||
* First released 25.08.2011 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate$
|
||||
* $LastChangedRevision$
|
||||
* $LastChangedBy$
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.ranking;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public interface Order<A> extends Comparator<A> {
|
||||
|
||||
/**
|
||||
* returns true if and only if a has only characters that belong to the implemented order
|
||||
* @param a
|
||||
* @return
|
||||
*/
|
||||
public boolean wellformed(A a);
|
||||
|
||||
public Order<A> clone();
|
||||
|
||||
/**
|
||||
* the ordering direction can be changed at any time
|
||||
* @param ascending
|
||||
*/
|
||||
public void direction(boolean ascending);
|
||||
|
||||
/**
|
||||
* returns a signature String so that different orderings have different signatures
|
||||
* @return
|
||||
*/
|
||||
public String signature();
|
||||
|
||||
public long partition(A key, int forkes);
|
||||
|
||||
/**
|
||||
* returns a cardinal number in the range of 0 .. Long.MAX_VALUE
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public long cardinal(A key);
|
||||
|
||||
public int compare(A a, A b);
|
||||
|
||||
public boolean equal(A a, A b);
|
||||
|
||||
/**
|
||||
* returns the zero point of the Ordering; null if not defined
|
||||
* @return
|
||||
*/
|
||||
public A zero();
|
||||
|
||||
/**
|
||||
* defines that the ordering rotates, and sets the zero point for the rotation
|
||||
* @param zero
|
||||
*/
|
||||
public void rotate(A zero);
|
||||
|
||||
/**
|
||||
* used to compare different order objects; they may define the same ordering
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o);
|
||||
|
||||
@Override
|
||||
public int hashCode();
|
||||
}
|
82
source/net/yacy/cora/ranking/Rating.java
Normal file
82
source/net/yacy/cora/ranking/Rating.java
Normal file
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Rating
|
||||
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
||||
* First released 25.08.2011 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate: 2011-03-08 02:51:51 +0100 (Di, 08 Mrz 2011) $
|
||||
* $LastChangedRevision: 7567 $
|
||||
* $LastChangedBy: low012 $
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.ranking;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class Rating<A> {
|
||||
|
||||
private final A object;
|
||||
private long score;
|
||||
|
||||
public Rating(final A o, final long score) {
|
||||
this.object = o;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public void setScore(final long score) {
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public long getScore() {
|
||||
return this.score;
|
||||
}
|
||||
|
||||
public A getObject() {
|
||||
return this.object;
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
public final static ObjectComparator<?> objectComparator = new ObjectComparator();
|
||||
public final static ScoreComparator scoreComparator = new ScoreComparator();
|
||||
|
||||
public static class ObjectComparator<B> implements Comparator<Rating<B>> {
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public int compare(final Rating<B> arg0, final Rating<B> arg1) {
|
||||
if (!(arg0 instanceof Comparable<?>)) throw new UnsupportedOperationException("object class must implement comparable");
|
||||
return ((Comparable<B>) arg0.getObject()).compareTo(arg1.getObject());
|
||||
}
|
||||
}
|
||||
|
||||
public static class ScoreComparator implements Comparator<Rating<?>> {
|
||||
|
||||
public int compare(final Rating<?> arg0, final Rating<?> arg1) {
|
||||
if (arg0.getScore() < arg1.getScore()) return -1;
|
||||
if (arg0.getScore() > arg1.getScore()) return 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
public static class FoldedScoreComparator<B extends Comparable<B>> implements Comparator<Rating<B>> {
|
||||
|
||||
public int compare(final Rating<B> arg0, final Rating<B> arg1) {
|
||||
final int c = scoreComparator.compare(arg0, arg1);
|
||||
if (c != 0) return c;
|
||||
return arg0.getObject().compareTo(arg1.getObject());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
66
source/net/yacy/cora/ranking/RatingOrder.java
Normal file
66
source/net/yacy/cora/ranking/RatingOrder.java
Normal file
|
@ -0,0 +1,66 @@
|
|||
// RatingOrder.java
|
||||
// -----------------------
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://yacy.net
|
||||
// Frankfurt, Germany, 2011
|
||||
// created 25.08.2011
|
||||
//
|
||||
// $LastChangedDate: 2011-03-08 02:51:51 +0100 (Di, 08 Mrz 2011) $
|
||||
// $LastChangedRevision: 7567 $
|
||||
// $LastChangedBy: low012 $
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
package net.yacy.cora.ranking;
|
||||
|
||||
|
||||
public class RatingOrder<A> extends AbstractOrder<Rating<A>> implements Order<Rating<A>> {
|
||||
|
||||
Order<A> ordering;
|
||||
|
||||
public RatingOrder(final Order<A> ordering) {
|
||||
this.ordering = ordering;
|
||||
}
|
||||
|
||||
public int compare(final Rating<A> a, final Rating<A> b) {
|
||||
return this.ordering.compare(a.getObject(), b.getObject());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean wellformed(final Rating<A> a) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String signature() {
|
||||
return "RA";
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cardinal(final Rating<A> key) {
|
||||
return key.getScore();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equal(final Rating<A> a, final Rating<A> b) {
|
||||
return this.ordering.compare(a.getObject(), b.getObject()) == 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Order<Rating<A>> clone() {
|
||||
return this;
|
||||
}
|
||||
}
|
|
@ -37,13 +37,13 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.ranking.AbstractOrder;
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.order.AbstractOrder;
|
||||
import net.yacy.kelondro.order.Base64Order;
|
||||
import net.yacy.kelondro.order.Bitfield;
|
||||
import net.yacy.kelondro.order.ByteOrder;
|
||||
import net.yacy.kelondro.order.NaturalOrder;
|
||||
import net.yacy.kelondro.order.Order;
|
||||
import net.yacy.kelondro.util.ByteBuffer;
|
||||
import net.yacy.kelondro.util.kelondroException;
|
||||
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
// AbstractOrder.java
|
||||
// -----------------------
|
||||
// part of The Kelondro Database
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2005
|
||||
// created 29.12.2005
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.kelondro.order;
|
||||
|
||||
public abstract class AbstractOrder<A> implements Order<A> {
|
||||
|
||||
protected A zero = null;
|
||||
protected boolean asc = true;
|
||||
|
||||
@Override
|
||||
abstract public Order<A> clone();
|
||||
|
||||
public A zero() {
|
||||
return zero;
|
||||
}
|
||||
|
||||
public void direction(final boolean ascending) {
|
||||
asc = ascending;
|
||||
}
|
||||
|
||||
public long partition(final A key, final int forks) {
|
||||
final long d = (Long.MAX_VALUE / forks) + ((Long.MAX_VALUE % forks) + 1) / forks;
|
||||
return cardinal(key) / d;
|
||||
}
|
||||
|
||||
public void rotate(final A newzero) {
|
||||
this.zero = newzero;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null) return false;
|
||||
if (!(obj instanceof Order<?>)) return false;
|
||||
Order<A> other = (Order<A>) obj;
|
||||
final String thisSig = this.signature();
|
||||
final String otherSig = other.signature();
|
||||
if ((thisSig == null) || (otherSig == null)) return false;
|
||||
return thisSig.equals(otherSig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.signature().hashCode();
|
||||
}
|
||||
|
||||
public A smallest(A a, A b) {
|
||||
return (compare(a, b) > 0) ? b : a;
|
||||
}
|
||||
|
||||
public A largest(A a, A b) {
|
||||
return (compare(a, b) > 0) ? a : b;
|
||||
}
|
||||
}
|
|
@ -30,6 +30,8 @@ package net.yacy.kelondro.order;
|
|||
import java.util.Comparator;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.ranking.AbstractOrder;
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
package net.yacy.kelondro.order;
|
||||
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Comparator;
|
|||
import java.util.ConcurrentModificationException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@ package net.yacy.kelondro.order;
|
|||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.yacy.cora.ranking.AbstractOrder;
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
// Order.java
|
||||
// -----------------------
|
||||
// part of The Kelondro Database
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2005
|
||||
// created 29.12.2005
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.kelondro.order;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public interface Order<A> extends Comparator<A> {
|
||||
|
||||
public boolean wellformed(A a); // returns true if and only if a has only characters that belong to the implemented order
|
||||
|
||||
public Order<A> clone();
|
||||
|
||||
public void direction(boolean ascending); // the ordering direction can be changed at any time
|
||||
|
||||
public String signature(); // returns a signature String so that different orderings have different signatures
|
||||
|
||||
public long partition(A key, int forkes);
|
||||
|
||||
public long cardinal(A key); // returns a cardinal number in the range of 0 .. Long.MAX_VALUE
|
||||
|
||||
public int compare(A a, A b);
|
||||
|
||||
public boolean equal(A a, A b);
|
||||
|
||||
public A zero(); // returns the zero point of the Ordering; null if not defined
|
||||
|
||||
public void rotate(A zero); // defines that the ordering rotates, and sets the zero point for the rotation
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o); // used to compare different order objects; they may define the same ordering
|
||||
|
||||
@Override
|
||||
public int hashCode();
|
||||
}
|
|
@ -30,6 +30,7 @@ package net.yacy.kelondro.order;
|
|||
import java.util.Comparator;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.ranking.Order;
|
||||
|
||||
public class StringOrder implements Comparator<String> {
|
||||
|
||||
|
|
|
@ -31,8 +31,7 @@ import java.io.IOException;
|
|||
import java.util.Iterator;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
import net.yacy.kelondro.order.Order;
|
||||
import net.yacy.cora.ranking.Order;
|
||||
|
||||
|
||||
public abstract class AbstractBufferedIndex<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> implements BufferedIndex<ReferenceType> {
|
||||
|
@ -41,15 +40,15 @@ public abstract class AbstractBufferedIndex<ReferenceType extends Reference> ext
|
|||
super(factory);
|
||||
}
|
||||
|
||||
public synchronized TreeSet<ReferenceContainer<ReferenceType>> references(byte[] startHash, final boolean rot, int count, boolean ram) throws IOException {
|
||||
public synchronized TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(byte[] startHash, final boolean rot, int count, final boolean ram) throws IOException {
|
||||
// creates a set of indexContainers
|
||||
// this does not use the cache
|
||||
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.termKeyOrdering().clone());
|
||||
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, termKeyOrdering().clone());
|
||||
if (startHash != null && startHash.length == 0) startHash = null;
|
||||
ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startHash);
|
||||
final ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(this.factory, startHash);
|
||||
containerOrder.rotate(emptyContainer);
|
||||
final TreeSet<ReferenceContainer<ReferenceType>> containers = new TreeSet<ReferenceContainer<ReferenceType>>(containerOrder);
|
||||
final Iterator<ReferenceContainer<ReferenceType>> i = references(startHash, rot, ram);
|
||||
final Iterator<ReferenceContainer<ReferenceType>> i = referenceContainerIterator(startHash, rot, ram);
|
||||
if (ram) count = Math.min(size(), count);
|
||||
ReferenceContainer<ReferenceType> container;
|
||||
// this loop does not terminate using the i.hasNex() predicate when rot == true
|
||||
|
|
|
@ -32,11 +32,11 @@ import java.util.Iterator;
|
|||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.order.Base64Order;
|
||||
import net.yacy.kelondro.order.Order;
|
||||
|
||||
public abstract class AbstractIndex <ReferenceType extends Reference> implements Index<ReferenceType> {
|
||||
|
||||
|
@ -53,17 +53,17 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
|
|||
* @throws IOException
|
||||
* @throws RowSpaceExceededException
|
||||
*/
|
||||
public void merge(Index<ReferenceType> otherIndex) throws IOException, RowSpaceExceededException {
|
||||
public void merge(final Index<ReferenceType> otherIndex) throws IOException, RowSpaceExceededException {
|
||||
byte[] term;
|
||||
for (ReferenceContainer<ReferenceType> otherContainer: otherIndex) {
|
||||
for (final ReferenceContainer<ReferenceType> otherContainer: otherIndex) {
|
||||
term = otherContainer.getTermHash();
|
||||
synchronized (this) {
|
||||
ReferenceContainer<ReferenceType> container = this.get(term, null);
|
||||
final ReferenceContainer<ReferenceType> container = get(term, null);
|
||||
if (container == null) {
|
||||
this.add(otherContainer);
|
||||
} else {
|
||||
container.merge(otherContainer);
|
||||
this.delete(term); // in some file-based environments we cannot just change the container
|
||||
delete(term); // in some file-based environments we cannot just change the container
|
||||
this.add(container);
|
||||
}
|
||||
}
|
||||
|
@ -90,14 +90,14 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
|
|||
return c;
|
||||
}
|
||||
|
||||
public synchronized TreeSet<ReferenceContainer<ReferenceType>> references(final byte[] startHash, final boolean rot, int count) throws IOException {
|
||||
public synchronized TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(final byte[] startHash, final boolean rot, int count) throws IOException {
|
||||
// creates a set of indexContainers
|
||||
// this does not use the cache
|
||||
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.termKeyOrdering().clone());
|
||||
final ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startHash);
|
||||
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, termKeyOrdering().clone());
|
||||
final ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(this.factory, startHash);
|
||||
containerOrder.rotate(emptyContainer);
|
||||
final TreeSet<ReferenceContainer<ReferenceType>> containers = new TreeSet<ReferenceContainer<ReferenceType>>(containerOrder);
|
||||
final Iterator<ReferenceContainer<ReferenceType>> i = references(startHash, rot);
|
||||
final Iterator<ReferenceContainer<ReferenceType>> i = referenceContainerIterator(startHash, rot);
|
||||
//if (ram) count = Math.min(size(), count);
|
||||
ReferenceContainer<ReferenceType> container;
|
||||
// this loop does not terminate using the i.hasNex() predicate when rot == true
|
||||
|
@ -145,8 +145,8 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
|
|||
|
||||
// retrieve index
|
||||
try {
|
||||
singleContainer = this.get(singleHash, urlselection);
|
||||
} catch (IOException e) {
|
||||
singleContainer = get(singleHash, urlselection);
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
continue;
|
||||
}
|
||||
|
@ -173,26 +173,26 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
|
|||
public ReferenceContainer<ReferenceType> searchJoin(final HandleSet wordHashes, final HandleSet urlselection, final int maxDistance) throws RowSpaceExceededException {
|
||||
// first check if there is any entry that has no match;
|
||||
// this uses only operations in ram
|
||||
for (byte[] wordHash: wordHashes) {
|
||||
if (!this.has(wordHash)) return ReferenceContainer.emptyContainer(factory, null, 0);
|
||||
for (final byte[] wordHash: wordHashes) {
|
||||
if (!has(wordHash)) return ReferenceContainer.emptyContainer(this.factory, null, 0);
|
||||
}
|
||||
|
||||
// retrieve entities that belong to the hashes
|
||||
ReferenceContainer<ReferenceType> resultContainer = null;
|
||||
ReferenceContainer<ReferenceType> singleContainer;
|
||||
for (byte[] wordHash: wordHashes) {
|
||||
for (final byte[] wordHash: wordHashes) {
|
||||
// retrieve index
|
||||
try {
|
||||
singleContainer = this.get(wordHash, urlselection);
|
||||
} catch (IOException e) {
|
||||
singleContainer = get(wordHash, urlselection);
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// check result
|
||||
if ((singleContainer == null || singleContainer.isEmpty())) return ReferenceContainer.emptyContainer(factory, null, 0);
|
||||
if ((singleContainer == null || singleContainer.isEmpty())) return ReferenceContainer.emptyContainer(this.factory, null, 0);
|
||||
if (resultContainer == null) resultContainer = singleContainer; else {
|
||||
resultContainer = ReferenceContainer.joinConstructive(factory, resultContainer, singleContainer, maxDistance);
|
||||
resultContainer = ReferenceContainer.joinConstructive(this.factory, resultContainer, singleContainer, maxDistance);
|
||||
}
|
||||
|
||||
// finish if the result is empty
|
||||
|
|
|
@ -105,7 +105,7 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> references(
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(
|
||||
byte[] startHash,
|
||||
boolean rot,
|
||||
boolean buffer
|
||||
|
@ -124,7 +124,7 @@ public interface BufferedIndex<ReferenceType extends Reference> extends Index<Re
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public TreeSet<ReferenceContainer<ReferenceType>> references(
|
||||
public TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(
|
||||
byte[] startHash,
|
||||
boolean rot,
|
||||
int count,
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.io.IOException;
|
|||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import net.yacy.cora.ranking.Rating;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
import net.yacy.kelondro.order.ByteOrder;
|
||||
|
@ -133,6 +134,7 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
|
|||
public void removeDelayed(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException;
|
||||
|
||||
public void removeDelayed() throws IOException;
|
||||
|
||||
/**
|
||||
* iterate all references from the beginning of a specific word hash
|
||||
* @param startHash
|
||||
|
@ -141,13 +143,26 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> references(
|
||||
byte[] startHash,
|
||||
boolean rot
|
||||
) throws IOException;
|
||||
public CloneableIterator<Rating<byte[]>> referenceCountIterator(
|
||||
byte[] startHash,
|
||||
boolean rot
|
||||
) throws IOException;
|
||||
|
||||
/**
|
||||
* iterate all references from the beginning of a specific word hash
|
||||
* @param startHash
|
||||
* @param rot if true, then rotate at the end to the beginning
|
||||
* @param ram
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(
|
||||
byte[] startHash,
|
||||
boolean rot
|
||||
) throws IOException;
|
||||
|
||||
|
||||
public TreeSet<ReferenceContainer<ReferenceType>> references(
|
||||
public TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(
|
||||
byte[] startHash,
|
||||
boolean rot,
|
||||
int count
|
||||
|
|
|
@ -32,6 +32,9 @@ import java.util.Iterator;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.cora.ranking.Rating;
|
||||
import net.yacy.cora.ranking.RatingOrder;
|
||||
import net.yacy.cora.storage.ComparableARC;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
|
@ -40,7 +43,6 @@ import net.yacy.kelondro.logging.Log;
|
|||
import net.yacy.kelondro.order.ByteOrder;
|
||||
import net.yacy.kelondro.order.CloneableIterator;
|
||||
import net.yacy.kelondro.order.MergeIterator;
|
||||
import net.yacy.kelondro.order.Order;
|
||||
import net.yacy.kelondro.util.EventTracker;
|
||||
import net.yacy.kelondro.util.MemoryControl;
|
||||
|
||||
|
@ -447,16 +449,32 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
|
|||
}
|
||||
|
||||
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
|
||||
return references(null, false);
|
||||
return referenceContainerIterator(null, false);
|
||||
}
|
||||
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] starttermHash, final boolean rot) {
|
||||
public CloneableIterator<Rating<byte[]>> referenceCountIterator(final byte[] starttermHash, final boolean rot) {
|
||||
final RatingOrder<byte[]> containerOrder = new RatingOrder<byte[]>(this.ram.rowdef().getOrdering());
|
||||
containerOrder.rotate(new Rating<byte[]>(starttermHash, 0));
|
||||
return new MergeIterator<Rating<byte[]>>(
|
||||
this.ram.referenceCountIterator(starttermHash, rot),
|
||||
new MergeIterator<Rating<byte[]>>(
|
||||
this.ram.referenceCountIterator(starttermHash, false),
|
||||
this.array.referenceCountIterator(starttermHash, false),
|
||||
containerOrder,
|
||||
ReferenceContainer.containerMergeMethod,
|
||||
true),
|
||||
containerOrder,
|
||||
ReferenceContainer.containerMergeMethod,
|
||||
true);
|
||||
}
|
||||
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] starttermHash, final boolean rot) {
|
||||
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, this.ram.rowdef().getOrdering().clone());
|
||||
containerOrder.rotate(new ReferenceContainer<ReferenceType>(this.factory, starttermHash));
|
||||
return new MergeIterator<ReferenceContainer<ReferenceType>>(
|
||||
this.ram.references(starttermHash, rot),
|
||||
this.ram.referenceContainerIterator(starttermHash, rot),
|
||||
new MergeIterator<ReferenceContainer<ReferenceType>>(
|
||||
this.ram.references(starttermHash, false),
|
||||
this.ram.referenceContainerIterator(starttermHash, false),
|
||||
this.array.referenceContainerIterator(starttermHash, false),
|
||||
containerOrder,
|
||||
ReferenceContainer.containerMergeMethod,
|
||||
|
@ -466,14 +484,14 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
|
|||
true);
|
||||
}
|
||||
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startTermHash, final boolean rot, final boolean ram) {
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] startTermHash, final boolean rot, final boolean ram) {
|
||||
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(this.factory, this.ram.rowdef().getOrdering().clone());
|
||||
containerOrder.rotate(new ReferenceContainer<ReferenceType>(this.factory, startTermHash));
|
||||
if (ram) {
|
||||
return this.ram.references(startTermHash, rot);
|
||||
return this.ram.referenceContainerIterator(startTermHash, rot);
|
||||
}
|
||||
return new MergeIterator<ReferenceContainer<ReferenceType>>(
|
||||
this.ram.references(startTermHash, false),
|
||||
this.ram.referenceContainerIterator(startTermHash, false),
|
||||
this.array.referenceContainerIterator(startTermHash, false),
|
||||
containerOrder,
|
||||
ReferenceContainer.containerMergeMethod,
|
||||
|
|
|
@ -35,7 +35,7 @@ public interface IndexReader<ReferenceType extends Reference> {
|
|||
public int size();
|
||||
public boolean has(byte[] wordHash); // should only be used if in case that true is returned the getContainer is NOT called
|
||||
public ReferenceContainer<ReferenceType> get(byte[] wordHash, HandleSet urlselection);
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> references(byte[] startWordHash, boolean rot);
|
||||
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(byte[] startWordHash, boolean rot);
|
||||
public void close();
|
||||
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.io.IOException;
|
|||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.yacy.cora.ranking.Rating;
|
||||
import net.yacy.kelondro.blob.ArrayStack;
|
||||
import net.yacy.kelondro.blob.BLOB;
|
||||
import net.yacy.kelondro.index.HandleMap;
|
||||
|
@ -62,7 +63,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
final ReferenceFactory<ReferenceType> factory,
|
||||
final ByteOrder termOrder,
|
||||
final int termSize,
|
||||
IODispatcher merger) throws IOException {
|
||||
final IODispatcher merger) throws IOException {
|
||||
this.factory = factory;
|
||||
this.array = new ArrayStack(
|
||||
heapLocation,
|
||||
|
@ -84,7 +85,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
}
|
||||
|
||||
public long mem() {
|
||||
return array.mem();
|
||||
return this.array.mem();
|
||||
}
|
||||
|
||||
public int[] sizes() {
|
||||
|
@ -99,7 +100,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
return this.array.newBLOB(new Date());
|
||||
}
|
||||
|
||||
public void mountBLOBFile(File location) throws IOException {
|
||||
public void mountBLOBFile(final File location) throws IOException {
|
||||
this.array.mountBLOB(location, false);
|
||||
}
|
||||
|
||||
|
@ -116,7 +117,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] startWordHash, final boolean rot) {
|
||||
try {
|
||||
return new ReferenceContainerIterator(startWordHash, rot);
|
||||
} catch (IOException e) {
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
|
@ -134,14 +135,14 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
|
||||
public ReferenceContainerIterator(final byte[] startWordHash, final boolean rot) throws IOException {
|
||||
this.rot = rot;
|
||||
this.iterator = array.keys(true, startWordHash);
|
||||
this.iterator = ReferenceContainerArray.this.array.keys(true, startWordHash);
|
||||
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
|
||||
}
|
||||
|
||||
public ReferenceContainerIterator clone(final Object secondWordHash) {
|
||||
try {
|
||||
return new ReferenceContainerIterator((byte[]) secondWordHash, rot);
|
||||
} catch (IOException e) {
|
||||
return new ReferenceContainerIterator((byte[]) secondWordHash, this.rot);
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
|
@ -149,32 +150,32 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
|
||||
public boolean hasNext() {
|
||||
if (this.iterator == null) return false;
|
||||
if (rot) return true;
|
||||
return iterator.hasNext();
|
||||
if (this.rot) return true;
|
||||
return this.iterator.hasNext();
|
||||
}
|
||||
|
||||
public ReferenceContainer<ReferenceType> next() {
|
||||
if (iterator.hasNext()) try {
|
||||
return get(iterator.next());
|
||||
} catch (Exception e) {
|
||||
if (this.iterator.hasNext()) try {
|
||||
return get(this.iterator.next());
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
// rotation iteration
|
||||
if (!rot) {
|
||||
if (!this.rot) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
iterator = array.keys(true, null);
|
||||
return get(iterator.next());
|
||||
} catch (Exception e) {
|
||||
this.iterator = ReferenceContainerArray.this.array.keys(true, null);
|
||||
return get(this.iterator.next());
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
iterator.remove();
|
||||
this.iterator.remove();
|
||||
}
|
||||
|
||||
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
|
||||
|
@ -183,6 +184,79 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* return an iterator object that counts the number of references in indexContainers
|
||||
* the startWordHash may be null to iterate all from the beginning
|
||||
* @throws IOException
|
||||
*/
|
||||
public CloneableIterator<Rating<byte[]>> referenceCountIterator(final byte[] startWordHash, final boolean rot) {
|
||||
try {
|
||||
return new ReferenceCountIterator(startWordHash, rot);
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public class ReferenceCountIterator implements CloneableIterator<Rating<byte[]>>, Iterable<Rating<byte[]>> {
|
||||
|
||||
private final boolean rot;
|
||||
protected CloneableIterator<byte[]> iterator;
|
||||
|
||||
public ReferenceCountIterator(final byte[] startWordHash, final boolean rot) throws IOException {
|
||||
this.rot = rot;
|
||||
this.iterator = ReferenceContainerArray.this.array.keys(true, startWordHash);
|
||||
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
|
||||
}
|
||||
|
||||
public ReferenceCountIterator clone(final Object secondWordHash) {
|
||||
try {
|
||||
return new ReferenceCountIterator((byte[]) secondWordHash, this.rot);
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if (this.iterator == null) return false;
|
||||
if (this.rot) return true;
|
||||
return this.iterator.hasNext();
|
||||
}
|
||||
|
||||
public Rating<byte[]> next() {
|
||||
byte[] reference;
|
||||
if (this.iterator.hasNext()) try {
|
||||
reference = this.iterator.next();
|
||||
return new Rating<byte[]>(reference, count(reference));
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
// rotation iteration
|
||||
if (!this.rot) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
this.iterator = ReferenceContainerArray.this.array.keys(true, null);
|
||||
reference = this.iterator.next();
|
||||
return new Rating<byte[]>(reference, count(reference));
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
this.iterator.remove();
|
||||
}
|
||||
|
||||
public Iterator<Rating<byte[]>> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* test if a given key is in the heap
|
||||
* this works with heaps in write- and read-mode
|
||||
|
@ -202,10 +276,10 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
* @throws RowSpaceExceededException
|
||||
*/
|
||||
public ReferenceContainer<ReferenceType> get(final byte[] termHash) throws IOException, RowSpaceExceededException {
|
||||
long timeout = System.currentTimeMillis() + 3000;
|
||||
Iterator<byte[]> entries = this.array.getAll(termHash).iterator();
|
||||
final long timeout = System.currentTimeMillis() + 3000;
|
||||
final Iterator<byte[]> entries = this.array.getAll(termHash).iterator();
|
||||
if (entries == null || !entries.hasNext()) return null;
|
||||
byte[] a = entries.next();
|
||||
final byte[] a = entries.next();
|
||||
int k = 1;
|
||||
ReferenceContainer<ReferenceType> c = new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(a, this.factory.getRow()));
|
||||
if (System.currentTimeMillis() > timeout) {
|
||||
|
@ -224,10 +298,10 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
}
|
||||
|
||||
public int count(final byte[] termHash) throws IOException {
|
||||
long timeout = System.currentTimeMillis() + 3000;
|
||||
Iterator<Long> entries = this.array.lengthAll(termHash).iterator();
|
||||
final long timeout = System.currentTimeMillis() + 3000;
|
||||
final Iterator<Long> entries = this.array.lengthAll(termHash).iterator();
|
||||
if (entries == null || !entries.hasNext()) return 0;
|
||||
Long a = entries.next();
|
||||
final Long a = entries.next();
|
||||
int k = 1;
|
||||
int c = RowSet.importRowCount(a, this.factory.getRow());
|
||||
assert c >= 0;
|
||||
|
@ -268,11 +342,11 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
*/
|
||||
public void delete(final byte[] termHash) throws IOException {
|
||||
// returns the index that had been deleted
|
||||
array.delete(termHash);
|
||||
this.array.delete(termHash);
|
||||
}
|
||||
|
||||
public int reduce(final byte[] termHash, ContainerReducer<ReferenceType> reducer) throws IOException, RowSpaceExceededException {
|
||||
return array.reduce(termHash, new BLOBReducer(termHash, reducer));
|
||||
public int reduce(final byte[] termHash, final ContainerReducer<ReferenceType> reducer) throws IOException, RowSpaceExceededException {
|
||||
return this.array.reduce(termHash, new BLOBReducer(termHash, reducer));
|
||||
}
|
||||
|
||||
public class BLOBReducer implements BLOB.Reducer {
|
||||
|
@ -280,16 +354,16 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
ContainerReducer<ReferenceType> rewriter;
|
||||
byte[] wordHash;
|
||||
|
||||
public BLOBReducer(byte[] wordHash, ContainerReducer<ReferenceType> rewriter) {
|
||||
public BLOBReducer(final byte[] wordHash, final ContainerReducer<ReferenceType> rewriter) {
|
||||
this.rewriter = rewriter;
|
||||
this.wordHash = wordHash;
|
||||
}
|
||||
|
||||
public byte[] rewrite(byte[] b) throws RowSpaceExceededException {
|
||||
public byte[] rewrite(final byte[] b) throws RowSpaceExceededException {
|
||||
if (b == null) return null;
|
||||
ReferenceContainer<ReferenceType> c = rewriter.reduce(new ReferenceContainer<ReferenceType>(factory, this.wordHash, RowSet.importRowSet(b, factory.getRow())));
|
||||
final ReferenceContainer<ReferenceType> c = this.rewriter.reduce(new ReferenceContainer<ReferenceType>(ReferenceContainerArray.this.factory, this.wordHash, RowSet.importRowSet(b, ReferenceContainerArray.this.factory.getRow())));
|
||||
if (c == null) return null;
|
||||
byte bb[] = c.exportCollection();
|
||||
final byte bb[] = c.exportCollection();
|
||||
assert bb.length <= b.length;
|
||||
return bb;
|
||||
}
|
||||
|
@ -305,43 +379,43 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
return this.array.entries();
|
||||
}
|
||||
|
||||
public boolean shrink(long targetFileSize, long maxFileSize) {
|
||||
public boolean shrink(final long targetFileSize, final long maxFileSize) {
|
||||
if (this.array.entries() < 2) return false;
|
||||
boolean donesomething = false;
|
||||
|
||||
// first try to merge small files that match
|
||||
while (this.merger.queueLength() < 3 || this.array.entries() >= 50) {
|
||||
File[] ff = this.array.unmountBestMatch(2.0f, targetFileSize);
|
||||
final File[] ff = this.array.unmountBestMatch(2.0f, targetFileSize);
|
||||
if (ff == null) break;
|
||||
Log.logInfo("RICELL-shrink1", "unmountBestMatch(2.0, " + targetFileSize + ")");
|
||||
merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
|
||||
this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
|
||||
donesomething = true;
|
||||
}
|
||||
|
||||
// then try to merge simply any small file
|
||||
while (this.merger.queueLength() < 2) {
|
||||
File[] ff = this.array.unmountSmallest(targetFileSize);
|
||||
final File[] ff = this.array.unmountSmallest(targetFileSize);
|
||||
if (ff == null) break;
|
||||
Log.logInfo("RICELL-shrink2", "unmountSmallest(" + targetFileSize + ")");
|
||||
merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
|
||||
this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
|
||||
donesomething = true;
|
||||
}
|
||||
|
||||
// if there is no small file, then merge matching files up to limit
|
||||
while (this.merger.queueLength() < 1) {
|
||||
File[] ff = this.array.unmountBestMatch(2.0f, maxFileSize);
|
||||
final File[] ff = this.array.unmountBestMatch(2.0f, maxFileSize);
|
||||
if (ff == null) break;
|
||||
Log.logInfo("RICELL-shrink3", "unmountBestMatch(2.0, " + maxFileSize + ")");
|
||||
merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
|
||||
this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile());
|
||||
donesomething = true;
|
||||
}
|
||||
|
||||
// rewrite old files (hack from sixcooler, see http://forum.yacy-websuche.de/viewtopic.php?p=15004#p15004)
|
||||
while (this.merger.queueLength() < 1) {
|
||||
File ff = this.array.unmountOldest();
|
||||
final File ff = this.array.unmountOldest();
|
||||
if (ff == null) break;
|
||||
Log.logInfo("RICELL-shrink4/rewrite", "unmountOldest()");
|
||||
merger.merge(ff, null, this.factory, this.array, newContainerBLOBFile());
|
||||
this.merger.merge(ff, null, this.factory, this.array, newContainerBLOBFile());
|
||||
donesomething = true;
|
||||
}
|
||||
|
||||
|
@ -355,13 +429,13 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
final Row payloadrow) throws IOException, RowSpaceExceededException {
|
||||
|
||||
System.out.println("CELL REFERENCE COLLECTION startup");
|
||||
HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath());
|
||||
String[] files = heapLocation.list();
|
||||
for (String f: files) {
|
||||
final HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath());
|
||||
final String[] files = heapLocation.list();
|
||||
for (final String f: files) {
|
||||
if (f.length() < 22 || !f.startsWith("text.index") || !f.endsWith(".blob")) continue;
|
||||
File fl = new File(heapLocation, f);
|
||||
final File fl = new File(heapLocation, f);
|
||||
System.out.println("CELL REFERENCE COLLECTION opening blob " + fl);
|
||||
CloneableIterator<ReferenceContainer<ReferenceType>> ei = new ReferenceIterator<ReferenceType>(fl, factory);
|
||||
final CloneableIterator<ReferenceContainer<ReferenceType>> ei = new ReferenceIterator<ReferenceType>(fl, factory);
|
||||
|
||||
ReferenceContainer<ReferenceType> container;
|
||||
final long start = System.currentTimeMillis();
|
||||
|
@ -372,7 +446,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
|
|||
while (ei.hasNext()) {
|
||||
container = ei.next();
|
||||
if (container == null) continue;
|
||||
Iterator<ReferenceType> refi = container.entries();
|
||||
final Iterator<ReferenceType> refi = container.entries();
|
||||
while (refi.hasNext()) {
|
||||
reference = refi.next();
|
||||
if (reference == null) continue;
|
||||
|
|
|
@ -36,6 +36,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.cora.ranking.Rating;
|
||||
import net.yacy.kelondro.blob.HeapWriter;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
import net.yacy.kelondro.index.Row;
|
||||
|
@ -177,6 +178,17 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
|
|||
return cachecopy;
|
||||
}
|
||||
|
||||
protected List<Rating<ByteArray>> ratingList() {
|
||||
final List<Rating<ByteArray>> list = new ArrayList<Rating<ByteArray>>(this.cache.size());
|
||||
synchronized (this.cache) {
|
||||
for (final Map.Entry<ByteArray, ReferenceContainer<ReferenceType>> entry: this.cache.entrySet()) {
|
||||
if (entry.getValue() != null && entry.getValue().getTermHash() != null) list.add(new Rating<ByteArray>(entry.getKey(), entry.getValue().size()));
|
||||
}
|
||||
}
|
||||
Collections.sort(list, new Rating.ObjectComparator<ByteArray>());
|
||||
return list;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return (this.cache == null) ? 0 : this.cache.size();
|
||||
}
|
||||
|
@ -195,26 +207,24 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
|
|||
return max;
|
||||
}
|
||||
|
||||
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
|
||||
return referenceContainerIterator(null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* return an iterator object that creates top-level-clones of the indexContainers
|
||||
* in the cache, so that manipulations of the iterated objects do not change
|
||||
* objects in the cache.
|
||||
*/
|
||||
public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startWordHash, final boolean rot) {
|
||||
return new heapCacheIterator(startWordHash, rot);
|
||||
public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(final byte[] startWordHash, final boolean rot) {
|
||||
return new ReferenceContainerIterator(startWordHash, rot);
|
||||
}
|
||||
|
||||
|
||||
public Iterator<ReferenceContainer<ReferenceType>> iterator() {
|
||||
return references(null, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* cache iterator: iterates objects within the heap cache. This can only be used
|
||||
* for write-enabled heaps, read-only heaps do not have a heap cache
|
||||
*/
|
||||
public class heapCacheIterator implements CloneableIterator<ReferenceContainer<ReferenceType>>, Iterable<ReferenceContainer<ReferenceType>> {
|
||||
public class ReferenceContainerIterator implements CloneableIterator<ReferenceContainer<ReferenceType>>, Iterable<ReferenceContainer<ReferenceType>> {
|
||||
|
||||
// this class exists, because the wCache cannot be iterated with rotation
|
||||
// and because every indexContainer Object that is iterated must be returned as top-level-clone
|
||||
|
@ -226,7 +236,7 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
|
|||
private int p;
|
||||
private byte[] latestTermHash;
|
||||
|
||||
public heapCacheIterator(byte[] startWordHash, final boolean rot) {
|
||||
public ReferenceContainerIterator(byte[] startWordHash, final boolean rot) {
|
||||
this.rot = rot;
|
||||
if (startWordHash != null && startWordHash.length == 0) startWordHash = null;
|
||||
this.cachecopy = sortedClone();
|
||||
|
@ -242,8 +252,8 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
|
|||
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
|
||||
}
|
||||
|
||||
public heapCacheIterator clone(final Object secondWordHash) {
|
||||
return new heapCacheIterator((byte[]) secondWordHash, this.rot);
|
||||
public ReferenceContainerIterator clone(final Object secondWordHash) {
|
||||
return new ReferenceContainerIterator((byte[]) secondWordHash, this.rot);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
@ -289,6 +299,75 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
public CloneableIterator<Rating<byte[]>> referenceCountIterator(final byte[] startHash, final boolean rot) {
|
||||
return new ReferenceCountIterator(startHash, rot);
|
||||
}
|
||||
|
||||
/**
|
||||
* cache iterator: iterates objects within the heap cache. This can only be used
|
||||
* for write-enabled heaps, read-only heaps do not have a heap cache
|
||||
*/
|
||||
public class ReferenceCountIterator implements CloneableIterator<Rating<byte[]>>, Iterable<Rating<byte[]>> {
|
||||
|
||||
private final boolean rot;
|
||||
private final List<Rating<ByteArray>> cachecounts;
|
||||
private int p;
|
||||
private byte[] latestTermHash;
|
||||
|
||||
public ReferenceCountIterator(byte[] startWordHash, final boolean rot) {
|
||||
this.rot = rot;
|
||||
if (startWordHash != null && startWordHash.length == 0) startWordHash = null;
|
||||
this.cachecounts = ratingList();
|
||||
assert this.cachecounts != null;
|
||||
assert ReferenceContainerCache.this.termOrder != null;
|
||||
this.p = 0;
|
||||
if (startWordHash != null) {
|
||||
while ( this.p < this.cachecounts.size() &&
|
||||
ReferenceContainerCache.this.termOrder.compare(this.cachecounts.get(this.p).getObject().asBytes(), startWordHash) < 0
|
||||
) this.p++;
|
||||
}
|
||||
this.latestTermHash = null;
|
||||
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
|
||||
}
|
||||
|
||||
public ReferenceCountIterator clone(final Object secondWordHash) {
|
||||
return new ReferenceCountIterator((byte[]) secondWordHash, this.rot);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if (this.rot) return this.cachecounts.size() > 0;
|
||||
return this.p < this.cachecounts.size();
|
||||
}
|
||||
|
||||
public Rating<byte[]> next() {
|
||||
if (this.p < this.cachecounts.size()) {
|
||||
final Rating<ByteArray> c = this.cachecounts.get(this.p++);
|
||||
this.latestTermHash = c.getObject().asBytes();
|
||||
return new Rating<byte[]>(c.getObject().asBytes(), c.getScore());
|
||||
}
|
||||
// rotation iteration
|
||||
if (!this.rot) {
|
||||
return null;
|
||||
}
|
||||
if (this.cachecounts.isEmpty()) return null;
|
||||
this.p = 0;
|
||||
final Rating<ByteArray> c = this.cachecounts.get(this.p++);
|
||||
this.latestTermHash = c.getObject().asBytes();
|
||||
return new Rating<byte[]>(c.getObject().asBytes(), c.getScore());
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
System.arraycopy(this.cachecounts, this.p, this.cachecounts, this.p - 1, this.cachecounts.size() - this.p);
|
||||
ReferenceContainerCache.this.cache.remove(new ByteArray(this.latestTermHash));
|
||||
}
|
||||
|
||||
public Iterator<Rating<byte[]>> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* test if a given key is in the heap
|
||||
* this works with heaps in write- and read-mode
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
|
||||
package net.yacy.kelondro.rwi;
|
||||
|
||||
import net.yacy.kelondro.order.AbstractOrder;
|
||||
import net.yacy.kelondro.order.Order;
|
||||
import net.yacy.cora.ranking.AbstractOrder;
|
||||
import net.yacy.cora.ranking.Order;
|
||||
|
||||
public class ReferenceContainerOrder<ReferenceType extends Reference> extends AbstractOrder<ReferenceContainer<ReferenceType>> implements Order<ReferenceContainer<ReferenceType>>, Cloneable {
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ import java.util.concurrent.ThreadPoolExecutor;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import net.yacy.cora.date.GenericFormatter;
|
||||
import net.yacy.cora.ranking.Order;
|
||||
import net.yacy.kelondro.blob.ArrayStack;
|
||||
import net.yacy.kelondro.index.Cache;
|
||||
import net.yacy.kelondro.index.HandleSet;
|
||||
|
@ -55,7 +56,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
|
|||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.order.CloneableIterator;
|
||||
import net.yacy.kelondro.order.MergeIterator;
|
||||
import net.yacy.kelondro.order.Order;
|
||||
import net.yacy.kelondro.order.StackIterator;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
import net.yacy.kelondro.util.NamePrefixThreadFactory;
|
||||
|
|
|
@ -648,7 +648,7 @@ public final class yacy {
|
|||
new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
|
||||
10000,
|
||||
(long) Integer.MAX_VALUE, false, false);
|
||||
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().references("AAAAAAAAAAAA".getBytes(), false, false);
|
||||
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().referenceContainerIterator("AAAAAAAAAAAA".getBytes(), false, false);
|
||||
|
||||
long urlCounter = 0, wordCounter = 0;
|
||||
long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = 0;
|
||||
|
@ -828,7 +828,7 @@ public final class yacy {
|
|||
new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
|
||||
10000,
|
||||
(long) Integer.MAX_VALUE, false, false);
|
||||
indexContainerIterator = WordIndex.termIndex().references(wordChunkStartHash.getBytes(), false, false);
|
||||
indexContainerIterator = WordIndex.termIndex().referenceContainerIterator(wordChunkStartHash.getBytes(), false, false);
|
||||
}
|
||||
int counter = 0;
|
||||
ReferenceContainer<WordReference> container = null;
|
||||
|
|
Loading…
Reference in New Issue
Block a user