// IndexCollectionMigration.java // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 30.03.2009 on http://yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate: 2009-03-13 11:34:51 +0100 (Fr, 13 Mrz 2009) $ // $LastChangedRevision: 5709 $ // $LastChangedBy: orbiter $ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package de.anomic.kelondro.text; import java.io.File; import java.io.IOException; import java.util.Set; import de.anomic.kelondro.index.Row; import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.CloneableIterator; import de.anomic.kelondro.order.MergeIterator; import de.anomic.kelondro.order.Order; import de.anomic.kelondro.order.RotateIterator; import de.anomic.kelondro.text.Index; import de.anomic.kelondro.text.IndexCollection; import de.anomic.kelondro.text.ReferenceContainer; import de.anomic.kelondro.text.ReferenceContainerOrder; import de.anomic.kelondro.text.referencePrototype.WordReferenceRow; import de.anomic.kelondro.util.FileUtils; import de.anomic.kelondro.util.Log; public final class IndexCollectionMigration extends AbstractBufferedIndex implements Index, BufferedIndex { private final IndexCell cell; private IndexCollection collections; private final IODispatcher merger; public IndexCollectionMigration ( final File indexPrimaryTextLocation, final ReferenceFactory factory, final ByteOrder wordOrdering, final Row payloadrow, final int entityCacheMaxSize, final long targetFileSize, final long maxFileSize, final IODispatcher merger, final int writeBufferSize, final Log log) throws IOException { super(factory); this.merger = merger; final File celldir = new File(indexPrimaryTextLocation, "RICELL"); this.cell = new IndexCell( celldir, factory, wordOrdering, WordReferenceRow.urlEntryRow, entityCacheMaxSize, targetFileSize, maxFileSize, this.merger, writeBufferSize); final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE"); if (textindexcache.exists()) { // migrate the "index.dhtout.blob" into RICELL directory File f = new File(textindexcache, "index.dhtout.blob"); if (f.exists()) { File n = this.cell.newContainerBLOBFile(); f.renameTo(n); this.cell.mountBLOBFile(n); } f = new File(textindexcache, "index.dhtin.blob"); if (f.exists()) { File n = this.cell.newContainerBLOBFile(); f.renameTo(n); this.cell.mountBLOBFile(n); } // delete everything else String[] l = textindexcache.list(); for (String s: l) { f = new File(textindexcache, s); FileUtils.deletedelete(f); } FileUtils.deletedelete(textindexcache); } // open collections, this is for migration only. final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION"); if (textindexcollections.exists()) { this.collections = new IndexCollection( textindexcollections, "collection", factory, 12, Base64Order.enhancedCoder, 7, WordReferenceRow.urlEntryRow, false); if (this.collections.size() == 0) { // delete everything here this.collections.close(); this.collections = null; String[] l = textindexcollections.list(); File f; for (String s: l) { f = new File(textindexcollections, s); FileUtils.deletedelete(f); } FileUtils.deletedelete(textindexcollections); } } else { this.collections = null; } } /* methods for interface Index */ public void add(final ReferenceContainer entries) throws IOException { assert (entries.row().objectsize == WordReferenceRow.urlEntryRow.objectsize); if (this.collections != null) { ReferenceContainer e = this.collections.delete(entries.getTermHash()); if (e != null) { e.merge(entries); cell.add(e); } else { cell.add(entries); } } else { cell.add(entries); } } public void add(final byte[] wordHash, final ReferenceType entry) throws IOException { if (this.collections != null) { ReferenceContainer e = this.collections.delete(wordHash); if (e != null) { e.add(entry); cell.add(e); } else { cell.add(wordHash, entry); } } else { cell.add(wordHash, entry); } } public boolean has(final byte[] wordHash) { if (this.collections != null) { ReferenceContainer e = this.collections.delete(wordHash); if (e != null) { try { cell.add(e); } catch (IOException e1) { e1.printStackTrace(); } return true; } else { return cell.has(wordHash); } } else { return cell.has(wordHash); } } public int count(byte[] wordHash) { if (this.collections != null) { ReferenceContainer e = this.collections.delete(wordHash); if (e != null) { try { cell.add(e); } catch (IOException e1) { e1.printStackTrace(); } return cell.count(wordHash); } else { return cell.count(wordHash); } } else { return cell.count(wordHash); } } public ReferenceContainer get(final byte[] wordHash, final Set urlselection) throws IOException { if (wordHash == null) { // wrong input return null; } if (this.collections != null) { ReferenceContainer e = this.collections.delete(wordHash); if (e != null) cell.add(e); } return this.cell.get(wordHash, urlselection); } public ReferenceContainer delete(final byte[] wordHash) throws IOException { ReferenceContainer cc = cell.delete(wordHash); if (cc == null) { if (collections == null) return null; return collections.delete(wordHash); } else { if (collections == null) return cc; ReferenceContainer cd = collections.delete(wordHash); if (cd == null) return cc; return cc.merge(cd); } } public boolean remove(final byte[] wordHash, final String urlHash) throws IOException { if (this.collections != null) { ReferenceContainer e = this.collections.delete(wordHash); if (e != null) cell.add(e); } return cell.remove(wordHash, urlHash); } public int remove(final byte[] wordHash, final Set urlHashes) throws IOException { if (this.collections != null) { ReferenceContainer e = this.collections.delete(wordHash); if (e != null) cell.add(e); } return cell.remove(wordHash, urlHashes); } public synchronized CloneableIterator> references(final byte[] startHash, final boolean rot, final boolean ram) throws IOException { final CloneableIterator> i = wordContainers(startHash, ram); if (rot) { return new RotateIterator>(i, Base64Order.zero(startHash.length), cell.size() + ((ram) ? 0 : collections.size())); } return i; } private synchronized CloneableIterator> wordContainers(final byte[] startWordHash, final boolean ram) throws IOException { final Order> containerOrder = new ReferenceContainerOrder(factory, cell.ordering().clone()); ReferenceContainer emptyContainer = ReferenceContainer.emptyContainer(factory, startWordHash, 0); containerOrder.rotate(emptyContainer); if (ram) { return cell.references(startWordHash, true); } if (collections == null) return cell.references(startWordHash, false); return new MergeIterator>( cell.references(startWordHash, false), collections.references(startWordHash, false), containerOrder, ReferenceContainer.containerMergeMethod, true); } public void clear() { try { cell.clear(); } catch (IOException e1) { e1.printStackTrace(); } if (collections != null) try { collections.clear(); } catch (IOException e) { e.printStackTrace(); } } public void close() { cell.close(); if (collections != null) collections.close(); } public int size() { return (collections == null) ? cell.size() : java.lang.Math.max(collections.size(), cell.size()); } public int minMem() { return 1024*1024 /* indexing overhead */ + cell.minMem() + ((collections == null) ? 0 : collections.minMem()); } /* * methods for cache management */ public int getBufferMaxReferences() { return cell.getBufferMaxReferences(); } public long getBufferMinAge() { return cell.getBufferMinAge(); } public long getBufferMaxAge() { return cell.getBufferMaxAge(); } public long getBufferSizeBytes() { return cell.getBufferSizeBytes(); } public void setBufferMaxWordCount(final int maxWords) { cell.setBufferMaxWordCount(maxWords); } public int getBackendSize() { return (collections == null) ? cell.getBackendSize() : collections.size(); } public int getBufferSize() { return cell.getBufferSize(); } public ByteOrder ordering() { return cell.ordering(); } public CloneableIterator> references(byte[] startWordHash, boolean rot) { final Order> containerOrder = new ReferenceContainerOrder(factory, this.cell.ordering().clone()); if (this.collections == null) return this.cell.references(startWordHash, rot); //else return new MergeIterator>( this.cell.references(startWordHash, false), this.collections.references(startWordHash, false), containerOrder, ReferenceContainer.containerMergeMethod, true); } public void cleanupBuffer(int time) { this.cell.cleanupBuffer(time); } }