yacy_search_server/source/de/anomic/kelondro/text/Index.java
orbiter c8624903c6 full redesign of index access data model:
terms (words) are not any more retrieved by their word hash string, but by a byte[] containing the word hash.
this has strong advantages when RWIs are sorted in the ReferenceContainer Cache and compared with the sun.java TreeMap method, which needed getBytes() and new String() transformations before.
Many thousands of such conversions are now omitted every second, which increases the indexing speed by a factor of two.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5812 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-04-16 15:29:00 +00:00

167 lines
5.5 KiB
Java

// ReverseIndex.java
// -----------------------------
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 6.5.2005 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.text;
import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
public interface Index <ReferenceType extends Reference> {
/**
* add references to the reverse index
* if no references to the word are stored, the new Entries are added,
* if there are already references to the word that is denoted with the
* reference to be stored, then the old and the new references are merged
* @param newEntries the References to be merged with existing references
* @throws IOException
*/
public void add(ReferenceContainer<ReferenceType> newEntries) throws IOException;
/**
* add a single reference to the reverse index
* if no references to the word are stored, the a new entry is added,
* if there are already references to the word hash stored,
* then the old and the new references are merged
* @param termHash
* @param entry
* @throws IOException
*/
public void add(final byte[] termHash, final ReferenceType entry) throws IOException;
/**
* check if there are references stored to the given word hash
* @param termHash
* @return true if references exist, false if not
*/
public boolean has(final byte[] termHash); // should only be used if in case that true is returned the getContainer is NOT called
/**
* count the number of references for the given word
* do not use this method to check the existence of a reference by comparing
* the result with zero, use hasReferences instead.
* @param termHash
* @return the number of references to the given word
*/
public int count(final byte[] termHash);
/**
* get the references to a given word.
* if referenceselection is not null, then all url references which are not
* in referenceselection are removed from the container
* @param termHash
* @param referenceselection
* @return the references
* @throws IOException
*/
public ReferenceContainer<ReferenceType> get(byte[] termHash, Set<String> referenceselection) throws IOException;
/**
* delete all references for a word
* @param termHash
* @return the deleted references
* @throws IOException
*/
public ReferenceContainer<ReferenceType> delete(byte[] termHash) throws IOException;
/**
* remove a specific reference entry
* @param termHash
* @param referenceHash the key for the reference entry to be removed
* @return
* @throws IOException
*/
public boolean remove(byte[] termHash, String referenceHash) throws IOException;
/**
* remove a set of reference entries for a given word
* @param termHash the key for the references
* @param referenceHash the reference entry keys
* @return
* @throws IOException
*/
public int remove(final byte[] termHash, Set<String> referenceHashes) throws IOException;
public int remove(final TreeSet<byte[]> termHashes, final String urlHash) throws IOException;
public void remove(final TreeSet<byte[]> termHashes, final Set<String> urlHashes) throws IOException;
/**
* iterate all references from the beginning of a specific word hash
* @param startHash
* @param rot if true, then rotate at the end to the beginning
* @param ram
* @return
* @throws IOException
*/
public CloneableIterator<ReferenceContainer<ReferenceType>> references(
byte[] startHash,
boolean rot
) throws IOException;
public TreeSet<ReferenceContainer<ReferenceType>> references(
byte[] startHash,
boolean rot,
int count
) throws IOException;
/**
* delete all references entries
* @throws IOException
*/
public void clear() throws IOException;
/**
* close the reverse index
*/
public void close();
/**
* the number of all references
* @return the nnumber of all references
*/
public int size();
/**
* calculate needed memory
* @return the memory needed to operate the object
*/
public int minMem();
/**
* return the order that is used for the storage of the word hashes
* @return
*/
public ByteOrder ordering();
}