yacy_search_server/source/net/yacy/kelondro/rwi/Index.java
Michael Peter Christen a1fe65b115 performance hacks
2012-06-05 12:06:26 +02:00

227 lines
7.9 KiB
Java

// ReverseIndex.java
// -----------------------------
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 6.5.2005 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.rwi;
import java.io.IOException;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.CloneableIterator;
import net.yacy.cora.sorting.Rating;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException;
public interface Index <ReferenceType extends Reference> extends Iterable<ReferenceContainer<ReferenceType>> {
/**
* every index entry is made for a term which has a fixed size
* @return the size of the term
*/
public int termKeyLength();
/**
* merge this index with another index
* @param otherIndex
*/
public void merge(Index<ReferenceType> otherIndex) throws IOException, RowSpaceExceededException;
/**
* add references to the reverse index
* if no references to the word are stored, the new Entries are added,
* if there are already references to the word that is denoted with the
* reference to be stored, then the old and the new references are merged
* @param newEntries the References to be merged with existing references
* @throws IOException
* @throws RowSpaceExceededException
*/
public void add(ReferenceContainer<ReferenceType> newEntries) throws IOException, RowSpaceExceededException;
/**
* add a single reference to the reverse index
* if no references to the word are stored, the a new entry is added,
* if there are already references to the word hash stored,
* then the old and the new references are merged
* @param termHash
* @param entry
* @throws IOException
* @throws RowSpaceExceededException
*/
public void add(final byte[] termHash, final ReferenceType entry) throws IOException, RowSpaceExceededException;
/**
* check if there are references stored to the given word hash
* @param termHash
* @return true if references exist, false if not
*/
public boolean has(final byte[] termHash); // should only be used if in case that true is returned the getContainer is NOT called
/**
* count the number of references for the given word
* do not use this method to check the existence of a reference by comparing
* the result with zero, use hasReferences instead.
* @param termHash
* @return the number of references to the given word
*/
public int count(final byte[] termHash);
/**
* get the references to a given word.
* if referenceselection is not null, then all url references which are not
* in referenceselection are removed from the container
* @param termHash
* @param referenceselection
* @return the references
* @throws IOException
*/
public ReferenceContainer<ReferenceType> get(byte[] termHash, HandleSet referenceselection) throws IOException;
/**
* remove all references for a word
* @param termHash
* @return the deleted references
* @throws IOException
*/
public ReferenceContainer<ReferenceType> remove(byte[] termHash) throws IOException;
/**
* delete all references for a word
* the difference to 'remove' is, that the removed element is not returned
* @param termHash
* @throws IOException
*/
public void delete(byte[] termHash) throws IOException;
/**
* remove a specific reference entry
* @param termHash
* @param referenceHash the key for the reference entry to be removed
* @return
* @throws IOException
*/
public boolean remove(byte[] termHash, byte[] referenceHash) throws IOException;
public void removeDelayed(byte[] termHash, byte[] referenceHash) throws IOException;
/**
* remove a set of reference entries for a given word
* @param termHash the key for the references
* @param referenceHash the reference entry keys
* @return
* @throws IOException
*/
public int remove(final byte[] termHash, HandleSet referenceHashes) throws IOException;
public void removeDelayed(final byte[] termHash, HandleSet referenceHashes) throws IOException;
public int remove(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException;
public void removeDelayed(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException;
public void removeDelayed() throws IOException;
/**
* iterate all references from the beginning of a specific word hash
* @param startHash
* @param rot if true, then rotate at the end to the beginning
* @param ram
* @return
* @throws IOException
*/
public CloneableIterator<Rating<byte[]>> referenceCountIterator(
byte[] startHash,
boolean rot,
boolean excludePrivate
) throws IOException;
/**
* iterate all references from the beginning of a specific word hash
* @param startHash
* @param rot if true, then rotate at the end to the beginning
* @param ram
* @return
* @throws IOException
*/
public CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(
byte[] startHash,
boolean rot,
boolean excludePrivate
) throws IOException;
public TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(
byte[] startHash,
boolean rot,
boolean excludePrivate,
int count
) throws IOException;
/**
* collect containers for given word hashes. This collection stops if a single container does not contain any references.
* In that case only a empty result is returned.
* @param wordHashes
* @param urlselection
* @return map of wordhash:indexContainer
*/
public TreeMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection);
/**
* delete all references entries
* @throws IOException
*/
public void clear() throws IOException;
/**
* close the reverse index
*/
public void close();
/**
* the number of all references
* @return the nnumber of all references
*/
public int size();
/**
* calculate needed memory
* @return the memory needed to operate the object
*/
public int minMem();
/**
* return the order that is used for the storage of the word hashes
* @return
*/
public ByteOrder termKeyOrdering();
/**
* ask for the Row that is used to construct one reference
* @return
*/
public Row referenceRow();
}