yacy_search_server/source/net/yacy/kelondro/blob/MapColumnIndex.java
Michael Peter Christen 8219a445f3 refactoring
2012-09-21 16:46:57 +02:00

176 lines
7.5 KiB
Java

/**
* MapColumnIndex
* Copyright 2012 by Michael Christen
* First released 01.02.2012 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.blob;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.order.NaturalOrder;
/**
* a mapping from a column name to maps with the value of the columns to the primary keys where the entry exist in the table
*/
public class MapColumnIndex implements Serializable {
private static final long serialVersionUID=-424741536889467566L;
private final Map<String, Map<String, Collection<byte[]>>> index;
public MapColumnIndex() {
this.index = new HashMap<String, Map<String, Collection<byte[]>>>();
}
public synchronized Collection<byte[]> getIndex(final String whereKey, final String isValue) throws UnsupportedOperationException {
Map<String, Collection<byte[]>> references = this.index.get(whereKey);
if (references == null) throw new UnsupportedOperationException();
Collection<byte[]> indexes = references.get(isValue);
if (indexes == null) return new ArrayList<byte[]>(0); // empty collection
return indexes;
}
public synchronized void clear() {
this.index.clear();
}
/**
* create a full index for the whereKey
* @param whereKey
* @param isValue
* @param table
*/
public synchronized void init(final String whereKey, final String isValue, final Iterator<Map.Entry<byte[], Map<String, String>>> table) {
Map<String, Collection<byte[]>> valueIdxMap = new HashMap<String, Collection<byte[]>>();
this.index.put(whereKey, valueIdxMap);
Map.Entry<byte[], Map<String, String>> line;
while (table.hasNext()) {
line = table.next();
String value = line.getValue().get(whereKey);
if (value == null) continue; // we don't need to remember that
indexupdate(line.getKey(), valueIdxMap, value.toLowerCase()); // add the entry lowercase (needed for seedDB.lookupByName)
}
}
/**
* update an index entry
* @param primarykey the primary key for the row that is updated
* @param row the row that was updated (a mapping from column names to values)
*/
public synchronized void update(final byte[] primarykey, final Map<String, String> row) {
for (Map.Entry<String, Map<String, Collection<byte[]>>> entry: this.index.entrySet()) {
// create an index for all columns that we track
String value = row.get(entry.getKey());
if (value == null) continue; // we don't need to remember that
indexupdate(primarykey, entry.getValue(), value);
}
}
private static void indexupdate(final byte[] primarykey, final Map<String, Collection<byte[]>> valueIdxMap, final String value) {
Collection<byte[]> indexes = valueIdxMap.get(value);
if (indexes == null) {
// create a new index entry
indexes = new ArrayList<byte[]>(1);
indexes.add(primarykey);
valueIdxMap.put(value, indexes);
} else {
// update the existing index entry
// check if value already exist
if (!net.yacy.kelondro.util.ByteBuffer.contains(indexes, primarykey)) {
indexes.add(primarykey);
}
}
}
/**
* delete all references to the primary key
* @param primarykey
*/
public synchronized void delete(final byte[] primarykey) {
for (Map.Entry<String, Map<String, Collection<byte[]>>> entry: this.index.entrySet()) {
// we must check all index reference maps: iterate over entries
indexdelete(primarykey, entry.getValue());
}
}
private static void indexdelete(final byte[] index, final Map<String, Collection<byte[]>> valueIdxMap) {
Iterator<Map.Entry<String, Collection<byte[]>>> i = valueIdxMap.entrySet().iterator();
Map.Entry<String, Collection<byte[]>> ref;
while (i.hasNext()) {
ref = i.next();
net.yacy.kelondro.util.ByteBuffer.remove(ref.getValue(), index);
if (ref.getValue().isEmpty()) {
i.remove();
}
}
}
private static Collection<byte[]> getIndexWithExceptionHandler(final MapColumnIndex idx, final String whereKey, final String isValue, Map<byte[], Map<String, String>> table) {
try {
return idx.getIndex(whereKey, isValue);
} catch (UnsupportedOperationException e) {
idx.init(whereKey, isValue, table.entrySet().iterator());
try {
return idx.getIndex(whereKey, isValue);
} catch (UnsupportedOperationException ee) {
throw ee;
}
}
}
private static void printIndex(Collection<byte[]> index) {
System.out.print("idx{");
int c = 0;
for (byte[] a: index) {
if (c++ != 0) System.out.print(", ");
System.out.print(ASCII.String(a));
}
System.out.print("}");
}
public static void main(String[] args) {
Map<byte[], Map<String, String>> table = new TreeMap<byte[], Map<String, String>>(NaturalOrder.naturalOrder);
Map<String, String> row;
row = new HashMap<String, String>(); row.put("a", "1"); row.put("b", "2"); row.put("c", "2"); table.put("line1".getBytes(), row);
row = new HashMap<String, String>(); row.put("a", "3"); row.put("b", "2"); row.put("c", "4"); table.put("line2".getBytes(), row);
row = new HashMap<String, String>(); row.put("a", "5"); row.put("b", "2"); row.put("c", "4"); table.put("line3".getBytes(), row);
row = new HashMap<String, String>(); row.put("a", "6"); row.put("b", "7"); row.put("c", "8"); table.put("line4".getBytes(), row);
MapColumnIndex idx = new MapColumnIndex();
System.out.print("colum b, value 2: "); printIndex(getIndexWithExceptionHandler(idx, "b", "2", table)); System.out.println();
System.out.print("colum c, value 4: "); printIndex(getIndexWithExceptionHandler(idx, "c", "4", table)); System.out.println();
System.out.print("colum b, value 2: "); printIndex(getIndexWithExceptionHandler(idx, "b", "7", table)); System.out.println();
System.out.print("colum d, value 0: "); printIndex(getIndexWithExceptionHandler(idx, "d", "0", table)); System.out.println();
row = new HashMap<String, String>(); row.put("a", "9"); row.put("b", "9"); row.put("c", "4"); table.put("line5".getBytes(), row);
idx.update("line5".getBytes(), row);
System.out.print("colum c, value 4: "); printIndex(getIndexWithExceptionHandler(idx, "c", "4", table)); System.out.println();
}
}