wordmigration now works also for new index database

if the new database is switched on, no 'too big' messages appear,
all the WORDS files can be completely migrated

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2553 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-09-12 08:23:47 +00:00
parent a0ddf2ec11
commit b7e7808ea6
2 changed files with 45 additions and 2 deletions

View File

@ -652,6 +652,45 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
}
}
public Object migrateWords2index(String wordhash) throws IOException {
// returns the number of entries that had been added to the assortments
// can be negative if some assortments have been moved to the backend
File db = plasmaWordIndexFile.wordHash2path(oldDatabaseRoot, wordhash);
if (!(db.exists())) return "not available";
plasmaWordIndexFile entity = null;
try {
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true);
int size = entity.size();
indexContainer container = new indexRowSetContainer(wordhash);
try {
Iterator entries = entity.elements(true);
indexEntry entry;
while (entries.hasNext()) {
entry = (indexEntry) entries.next();
// System.out.println("ENTRY = " + entry.getUrlHash());
container.add(new indexEntry[] { entry }, System.currentTimeMillis());
}
// we have read all elements, now delete the entity
entity.deleteComplete();
entity.close();
entity = null;
indexContainer feedback = collections.addEntries(container, container.updated(), false);
if (feedback != null) return feedback;
return new Integer(size);
} catch (kelondroException e) {
// database corrupted, we simply give up the database and delete it
try { entity.close(); } catch (Exception ee) { }
entity = null;
try { db.delete(); } catch (Exception ee) { }
return "database corrupted; deleted";
}
} finally {
if (entity != null) try {entity.close();}catch(Exception e){}
}
}
// The Cleaner class was provided as "UrldbCleaner" by Hydrox
// see http://www.yacy-forum.de/viewtopic.php?p=18093#18093
public Cleaner makeCleaner(plasmaCrawlLURL lurl, String startHash) {

View File

@ -651,7 +651,8 @@ public final class yacy {
File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT");
serverLog log = new serverLog("WORDMIGRATION");
log.logInfo("STARTING MIGRATION");
plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, 20000, 10000, log, sps.getConfigBool("useCollectionIndex", false));
boolean useCollectionIndex = sps.getConfigBool("useCollectionIndex", false);
plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, 20000, 10000, log, useCollectionIndex);
enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true);
String wordhash;
File wordfile;
@ -661,7 +662,10 @@ public final class yacy {
wordfile = (File) words.nextElement();
wordhash = wordfile.getName().substring(0, 12);
// System.out.println("NOW: " + wordhash);
migrationStatus = wordIndexCache.migrateWords2Assortment(wordhash);
if (useCollectionIndex)
migrationStatus = wordIndexCache.migrateWords2index(wordhash);
else
migrationStatus = wordIndexCache.migrateWords2Assortment(wordhash);
if (migrationStatus instanceof Integer) {
int migrationCount = ((Integer) migrationStatus).intValue();
if (migrationCount == 0)