// BEncodedHeap.java // (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 12.01.2010 on http://yacy.net // // $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $ // $LastChangedRevision: 6563 $ // $LastChangedBy: orbiter $ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.kelondro.blob; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; import java.util.TreeSet; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.util.BDecoder; import net.yacy.kelondro.util.BEncoder; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.BDecoder.BObject; /** * store a table of properties (instead of fixed-field entries) * this is realized using blobs and BEncoded property lists */ public class BEncodedHeap implements Map>, Iterable>> { private Heap table; private LinkedHashSet columnames; /** * produce or open a properties table * @param location the file * @param keylength length of access keys * @param ordering ordering on the keys * @param buffermax maximum number of lines that shall be buffered for writing * @throws IOException */ public BEncodedHeap( final File location, final int keylength, final ByteOrder ordering, int buffermax) throws IOException { this.table = new Heap(location, keylength, ordering, buffermax); this.columnames = new LinkedHashSet(); } /** * convenience method to open a properies table * @param location the file * @param keylength length of access keys */ public BEncodedHeap( final File location, final int keylength) throws IOException { this.table = new Heap(location, keylength, NaturalOrder.naturalOrder, 100); this.columnames = new LinkedHashSet(); } public byte[] encodedKey(String key) { return Base64Order.enhancedCoder.encodeSubstring(Digest.encodeMD5Raw(key), this.table.keylength); } private static class EntryIter implements Iterator>> { HeapReader.entries iter; public EntryIter(File location, int keylen) throws IOException { iter = new HeapReader.entries(location, keylen); } public boolean hasNext() { return iter.hasNext(); } public Entry> next() { Map.Entry entry = iter.next(); Map map = b2m(entry.getValue()); return new b2mEntry(entry.getKey(), map); } public void remove() { throw new UnsupportedOperationException(); } } public static class b2mEntry implements Map.Entry> { private final byte[] s; private Map b; public b2mEntry(final byte[] s, final Map b) { this.s = s; this.b = b; } public byte[] getKey() { return s; } public Map getValue() { return b; } public Map setValue(Map value) { Map b1 = b; b = value; return b1; } } private static Map b2m(byte[] b) { if (b == null) return null; //System.out.println("b = " + new String(b)); BDecoder decoder = new BDecoder(b); BObject bobj = decoder.parse(); if (bobj.getType() != BDecoder.BType.dictionary) return null; Map map = bobj.getMap(); Map m = new HashMap(); for (Map.Entry entry: map.entrySet()) { if (entry.getValue().getType() != BDecoder.BType.string) continue; m.put(entry.getKey(), entry.getValue().getString()); } return m; } /** * the map is stored inside a file; this method may return the file * @return the file where the map is stored */ public File getFile() { return this.table.heapFile; } /** * Retur the number of key-value mappings in this map. * @return the number of entries mappings in this map */ public int size() { return this.table.size(); } /** * return true if the table is empty */ public boolean isEmpty() { return this.table.size() == 0; } /** * check if a row with given key exists in the table * @param name * @return true if the row exists */ public boolean containsKey(byte[] pk) { return this.table.containsKey(pk); } /** * check if a row with given key exists in the table * This method is here to implement the Map interface * @param name * @return true if the row exists */ public boolean containsKey(Object key) { if (key instanceof byte[]) return containsKey((byte[]) key); return false; } /** * the containsValue method cannot be used in this method * and is only here to implement the Map interface */ public boolean containsValue(Object value) { // this method shall not be used because it is not appropriate for this kind of data throw new UnsupportedOperationException(); } /** * get a map from the table * @param name * @return the map if one found or NULL if no entry exists or the entry is corrupt * @throws RowSpaceExceededException * @throws IOException */ public Map get(byte[] pk) throws IOException, RowSpaceExceededException { byte[] b = this.table.get(pk); if (b == null) return null; return b2m(b); } /** * get a map from the table * this method is here to implement the Map interface * @param name * @return the map if one found or NULL if no entry exists or the entry is corrupt */ public Map get(Object key) { if (key instanceof byte[]) try { return get((byte[]) key); } catch (IOException e) { Log.logException(e); return null; } catch (RowSpaceExceededException e) { Log.logException(e); return null; } return null; } /** * convenience method to get a value from a map * @param pk * @param key * @return the value * @throws IOException * @throws RowSpaceExceededException */ public byte[] getProp(byte[] pk, String key) throws IOException, RowSpaceExceededException { byte[] b = this.table.get(pk); if (b == null) return null; Map map = b2m(b); return map.get(key); } /** * insert a map into the table * this method shall be used in exchange of the get method if the * previous entry value is not needed. * @param name * @param map * @throws RowSpaceExceededException * @throws IOException */ public void insert(byte[] pk, Map map) throws RowSpaceExceededException, IOException { byte[] b = BEncoder.encode(BEncoder.transcode(map)); this.table.insert(pk, b); this.columnames.addAll(map.keySet()); } public void insert(byte[] pk, String key, byte[] value) throws IOException { byte[] b = BEncoder.encodeMap(key, value); this.table.insert(pk, b); this.columnames.add(key); } /** * insert a map into the table * @param name * @param map */ public Map put(byte[] pk, Map map) { try { Map entry = this.get(pk); byte[] b = BEncoder.encode(BEncoder.transcode(map)); this.table.insert(pk, b); this.columnames.addAll(map.keySet()); return entry; } catch (IOException e) { Log.logException(e); return null; } catch (RowSpaceExceededException e) { Log.logException(e); return null; } } /** * delete a map from the table * @param name * @throws IOException */ public void delete(byte[] pk) throws IOException { this.table.delete(pk); } /** * delete a map from the table * @param name * @throws RowSpaceExceededException * @throws IOException */ public Map remove(byte[] key) throws IOException, RowSpaceExceededException { Map value = get(key); this.delete(key); return value; } public Map remove(Object key) { if (key instanceof byte[]) try { return remove((byte[]) key); } catch (IOException e) { Log.logException(e); return null; } catch (RowSpaceExceededException e) { Log.logException(e); return null; } return null; } /** * Copy all the mappings from the specified map to this map. * * @param m mappings to be stored in this map */ public void putAll(Map> map) { for (Map.Entry> me: map.entrySet()) { try { this.insert(me.getKey(), me.getValue()); } catch (RowSpaceExceededException e) { Log.logException(e); } catch (IOException e) { Log.logException(e); } } } /** * remove all entries from the map; * possibly removes the backend-file */ public void clear() { try { this.table.clear(); } catch (IOException e) { Log.logException(e); } } /** * close the backen-file. * Should be called explicitely to ensure that all data * waiting in IO write buffers are flushed */ public void close() { this.table.close(); } /** * Return a Set of the keys contained in this map. * This may not be a useful method, if possible use the keys() * method instead to iterate all keys from the backend-file * * @return a set view of the keys contained in this map */ public Set keySet() { TreeSet set = new TreeSet(this.table.ordering); try { Iterator i = this.table.keys(true, false); while (i.hasNext()) set.add(i.next()); } catch (IOException e) {} return set; } /** * iterate all keys of the table * @return an iterator of byte[] * @throws IOException */ public Iterator keys() throws IOException { return this.table.keys(true, false); } /** * the values() method is not implemented in this class * because it does not make sense to use such a method for * file-based data structures. To get a collection view of * all the entries, just use a entry iterator instead. * * @return nothing. The method throws always a UnsupportedOperationException */ public Collection> values() { // this method shall not be used because it is not appropriate for this kind of data throw new UnsupportedOperationException(); } /** * The abstract method entrySet() from AbstractMap must be implemented, * but never used because that is not useful for this file-based storage class. * To prevent the usage, a UnsupportedOperationException is thrown. * To prevent that the method is used by the methods from AbstractMap, all such * methods must be overriden in this class. These methods are: * size, containsValue, containsKey, get, remove, putAll, clear, * keySet, values, equals, hashCode and toString * * Instead of using this method, use the iterator() method to iterate * all elements in the back-end blob file */ public Set>> entrySet() { throw new UnsupportedOperationException(); } /** * iterate all rows of the table. * This method implements the * Iterable>> * interface */ public Iterator>> iterator() { File location = this.table.location(); int keylen = this.table.keylength(); try { this.table.flushBuffer(); return new EntryIter(location, keylen); } catch (IOException e1) { ByteOrder order = this.table.ordering(); int buffermax = this.table.getBuffermax(); this.table.close(); try { Iterator>> iter = new EntryIter(location, keylen); this.table = new Heap(location, keylen, order, buffermax); return iter; } catch (IOException e) { Log.logSevere("PropertiesTable", e.getMessage(), e); return null; } } } /** * iterate all rows of the table. this is a static method that expects that the given * file is not opened by any other application * @param location * @param keylen * @return * @throws IOException */ public static Iterator>> iterator(File location, int keylen) throws IOException { return new EntryIter(location, keylen); } /** * a hashcode for the object */ public int hashCode() { return this.table.name().hashCode(); } /** * Produce a list of column names from this table * This method may be useful if the table shall be displayed * as a table in GUIs. To show the first line of the table, the * table header, a list of all column names is required. This can * be generated with this method * @return a list of column names */ public ArrayList columns() { if (this.columnames.size() == 0) { for (Map.Entry> row: this) { this.columnames.addAll(row.getValue().keySet()); } } ArrayList l = new ArrayList(); l.addAll(this.columnames); return l; } public static void main(String[] args) { if (args.length == 0) { // test the class File f = new File(new File("maptest").getAbsolutePath()); //System.out.println(f.getAbsolutePath()); //System.out.println(f.getParent()); if (f.exists()) FileUtils.deletedelete(f); try { BEncodedHeap map = new BEncodedHeap(f, 4); // put some values into the map Map m = new HashMap(); m.put("k", "000".getBytes()); map.insert("123".getBytes(), m); m.put("k", "111".getBytes()); map.insert("456".getBytes(), m); m.put("k", "222".getBytes()); map.insert("789".getBytes(), m); // iterate over keys Iterator>> i = map.iterator(); while (i.hasNext()) { Map.Entry> entry = i.next(); System.out.println(new String(entry.getKey(), "UTF-8") + ": " + entry.getValue()); } // clean up map.close(); } catch (IOException e) { Log.logException(e); } catch (RowSpaceExceededException e) { Log.logException(e); } } else { File f = new File(args[0]); try { BEncodedHeap map = new BEncodedHeap(f, 12); Iterator>> i = map.iterator(); while (i.hasNext()) { Map.Entry> entry = i.next(); System.out.println(new String(entry.getKey(), "UTF-8") + ": " + entry.getValue()); } map.close(); } catch (IOException e) { Log.logException(e); } } } }