2005-05-17 10:25:04 +02:00
// plasmaWordIndexCache.java
// -------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
2005-11-04 14:41:51 +01:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-05-17 10:25:04 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma ;
2005-07-06 16:48:41 +02:00
import java.io.File ;
import java.io.IOException ;
import java.util.Iterator ;
import java.util.Map ;
import java.util.TreeMap ;
2006-05-26 11:32:50 +02:00
import de.anomic.index.indexEntry ;
import de.anomic.index.indexRI ;
import de.anomic.index.indexAbstractRI ;
2005-09-26 11:39:54 +02:00
import de.anomic.kelondro.kelondroArray ;
2005-07-06 16:48:41 +02:00
import de.anomic.kelondro.kelondroException ;
import de.anomic.kelondro.kelondroMScoreCluster ;
import de.anomic.kelondro.kelondroRecords ;
2005-06-09 12:34:20 +02:00
import de.anomic.server.logging.serverLog ;
2005-05-17 10:25:04 +02:00
import de.anomic.yacy.yacySeedDB ;
2006-05-26 11:32:50 +02:00
public final class plasmaWordIndexCache extends indexAbstractRI implements indexRI {
2005-11-04 14:41:51 +01:00
2005-05-17 10:25:04 +02:00
// environment constants
2005-07-18 15:32:44 +02:00
private static final String indexArrayFileName = " indexDump1.array " ;
2006-03-28 01:14:04 +02:00
public static final int wCacheReferenceLimit = 64 ;
2006-03-16 23:20:28 +01:00
public static final long wCacheMaxAge = 1000 * 60 * 30 ; // milliseconds; 30 minutes
public static final long kCacheMaxAge = 1000 * 60 * 2 ; // milliseconds; 2 minutes
2006-03-09 11:46:02 +01:00
2005-05-17 10:25:04 +02:00
// class variables
2005-09-26 11:39:54 +02:00
private final File databaseRoot ;
2006-03-13 11:43:12 +01:00
private final TreeMap wCache ; // wordhash-container
private final TreeMap kCache ; // time-container; for karenz/DHT caching (set with high priority)
2005-09-26 11:39:54 +02:00
private final kelondroMScoreCluster hashScore ;
private final kelondroMScoreCluster hashDate ;
2006-03-13 11:43:12 +01:00
private long kCacheInc = 0 ;
2005-10-10 11:28:28 +02:00
private long startTime ;
2006-03-13 11:43:12 +01:00
private int wCacheMaxCount ;
2005-09-26 11:39:54 +02:00
private final serverLog log ;
2005-05-17 10:25:04 +02:00
// calculated constants
2005-12-05 15:24:13 +01:00
private static String maxKey ;
2005-05-17 10:25:04 +02:00
static {
2005-12-05 15:24:13 +01:00
maxKey = " " ; for ( int i = 0 ; i < yacySeedDB . commonHashLength ; i + + ) maxKey + = 'z' ;
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
2005-05-17 10:25:04 +02:00
}
2006-02-14 01:12:07 +01:00
public plasmaWordIndexCache ( File databaseRoot , serverLog log ) {
2005-11-04 14:41:51 +01:00
2005-05-17 10:25:04 +02:00
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
this . databaseRoot = databaseRoot ;
2006-03-13 11:43:12 +01:00
this . wCache = new TreeMap ( ) ;
this . kCache = new TreeMap ( ) ;
2005-11-04 14:41:51 +01:00
this . hashScore = new kelondroMScoreCluster ( ) ;
2005-05-31 19:39:14 +02:00
this . hashDate = new kelondroMScoreCluster ( ) ;
2006-03-13 11:43:12 +01:00
this . kCacheInc = 0 ;
2005-05-31 19:39:14 +02:00
this . startTime = System . currentTimeMillis ( ) ;
2006-03-13 11:43:12 +01:00
this . wCacheMaxCount = 10000 ;
2005-05-17 10:25:04 +02:00
this . log = log ;
2006-02-14 01:12:07 +01:00
2005-05-17 10:25:04 +02:00
// read in dump of last session
try {
restore ( ) ;
} catch ( IOException e ) {
2005-08-30 23:32:59 +02:00
log . logSevere ( " unable to restore cache dump: " + e . getMessage ( ) , e ) ;
2005-05-17 10:25:04 +02:00
}
}
private void dump ( int waitingSeconds ) throws IOException {
2006-03-13 11:43:12 +01:00
log . logConfig ( " creating dump for index cache, " + wCache . size ( ) + " words (and much more urls) " ) ;
2005-07-18 15:32:44 +02:00
File indexDumpFile = new File ( databaseRoot , indexArrayFileName ) ;
if ( indexDumpFile . exists ( ) ) indexDumpFile . delete ( ) ;
2005-08-30 11:07:42 +02:00
kelondroArray dumpArray = null ;
2005-12-13 00:59:58 +01:00
dumpArray = new kelondroArray ( indexDumpFile , plasmaWordIndexAssortment . bufferStructureBasis , 0 , false ) ;
2005-11-04 14:41:51 +01:00
long startTime = System . currentTimeMillis ( ) ;
long messageTime = System . currentTimeMillis ( ) + 5000 ;
long wordsPerSecond = 0 , wordcount = 0 , urlcount = 0 ;
2006-03-13 11:43:12 +01:00
Map . Entry entry ;
String wordHash ;
plasmaWordIndexEntryContainer container ;
long updateTime ;
2006-05-20 01:19:20 +02:00
plasmaWordIndexEntryInstance wordEntry ;
2006-03-13 11:43:12 +01:00
byte [ ] [ ] row = new byte [ 5 ] [ ] ;
// write kCache, this will be melted with the wCache upon load
synchronized ( kCache ) {
Iterator i = kCache . values ( ) . iterator ( ) ;
while ( i . hasNext ( ) ) {
container = ( plasmaWordIndexEntryContainer ) i . next ( ) ;
// put entries on stack
if ( container ! = null ) {
Iterator ci = container . entries ( ) ;
while ( ci . hasNext ( ) ) {
2006-05-20 01:19:20 +02:00
wordEntry = ( plasmaWordIndexEntryInstance ) ci . next ( ) ;
2006-03-13 11:43:12 +01:00
row [ 0 ] = container . wordHash ( ) . getBytes ( ) ;
row [ 1 ] = kelondroRecords . long2bytes ( container . size ( ) , 4 ) ;
row [ 2 ] = kelondroRecords . long2bytes ( container . updated ( ) , 8 ) ;
row [ 3 ] = wordEntry . getUrlHash ( ) . getBytes ( ) ;
2006-05-20 01:19:20 +02:00
row [ 4 ] = wordEntry . toEncodedStringForm ( ) . getBytes ( ) ;
2006-03-13 11:43:12 +01:00
dumpArray . set ( ( int ) urlcount + + , row ) ;
}
}
wordcount + + ;
i . remove ( ) ; // free some mem
}
}
// write wCache
synchronized ( wCache ) {
Iterator i = wCache . entrySet ( ) . iterator ( ) ;
2005-11-04 14:41:51 +01:00
while ( i . hasNext ( ) ) {
// get entries
entry = ( Map . Entry ) i . next ( ) ;
wordHash = ( String ) entry . getKey ( ) ;
updateTime = getUpdateTime ( wordHash ) ;
container = ( plasmaWordIndexEntryContainer ) entry . getValue ( ) ;
// put entries on stack
if ( container ! = null ) {
Iterator ci = container . entries ( ) ;
while ( ci . hasNext ( ) ) {
2006-05-20 01:19:20 +02:00
wordEntry = ( plasmaWordIndexEntryInstance ) ci . next ( ) ;
2005-11-04 14:41:51 +01:00
row [ 0 ] = wordHash . getBytes ( ) ;
row [ 1 ] = kelondroRecords . long2bytes ( container . size ( ) , 4 ) ;
row [ 2 ] = kelondroRecords . long2bytes ( updateTime , 8 ) ;
row [ 3 ] = wordEntry . getUrlHash ( ) . getBytes ( ) ;
2006-05-20 01:19:20 +02:00
row [ 4 ] = wordEntry . toEncodedStringForm ( ) . getBytes ( ) ;
2005-11-04 14:41:51 +01:00
dumpArray . set ( ( int ) urlcount + + , row ) ;
}
}
wordcount + + ;
i . remove ( ) ; // free some mem
// write a log
if ( System . currentTimeMillis ( ) > messageTime ) {
// System.gc(); // for better statistic
wordsPerSecond = wordcount * 1000 / ( 1 + System . currentTimeMillis ( ) - startTime ) ;
2006-03-13 11:43:12 +01:00
log . logInfo ( " dumping status: " + wordcount + " words done, " + ( wCache . size ( ) / ( wordsPerSecond + 1 ) ) + " seconds remaining, free mem = " + ( Runtime . getRuntime ( ) . freeMemory ( ) / 1024 / 1024 ) + " MB " ) ;
2005-11-04 14:41:51 +01:00
messageTime = System . currentTimeMillis ( ) + 5000 ;
2005-07-18 15:32:44 +02:00
}
}
}
2005-11-04 14:41:51 +01:00
dumpArray . close ( ) ;
dumpArray = null ;
log . logConfig ( " dumped " + urlcount + " word/URL relations in " + ( ( System . currentTimeMillis ( ) - startTime ) / 1000 ) + " seconds " ) ;
2005-07-18 15:32:44 +02:00
}
2005-11-04 14:41:51 +01:00
2005-07-18 15:32:44 +02:00
private long restore ( ) throws IOException {
File indexDumpFile = new File ( databaseRoot , indexArrayFileName ) ;
if ( ! ( indexDumpFile . exists ( ) ) ) return 0 ;
kelondroArray dumpArray = new kelondroArray ( indexDumpFile ) ;
2005-09-13 18:29:59 +02:00
log . logConfig ( " restore array dump of index cache, " + dumpArray . size ( ) + " word/URL relations " ) ;
2005-07-18 15:32:44 +02:00
long startTime = System . currentTimeMillis ( ) ;
long messageTime = System . currentTimeMillis ( ) + 5000 ;
long urlCount = 0 , urlsPerSecond = 0 ;
try {
2006-03-13 11:43:12 +01:00
synchronized ( wCache ) {
2005-07-18 15:32:44 +02:00
int i = dumpArray . size ( ) ;
String wordHash ;
2006-03-11 22:55:32 +01:00
//long creationTime;
2006-05-20 01:19:20 +02:00
plasmaWordIndexEntryInstance wordEntry ;
2005-07-18 15:32:44 +02:00
byte [ ] [ ] row ;
2006-02-14 01:12:07 +01:00
//Runtime rt = Runtime.getRuntime();
2005-07-18 15:32:44 +02:00
while ( i - - > 0 ) {
// get out one entry
row = dumpArray . get ( i ) ;
2005-09-11 05:54:52 +02:00
if ( ( row [ 0 ] = = null ) | | ( row [ 1 ] = = null ) | | ( row [ 2 ] = = null ) | | ( row [ 3 ] = = null ) | | ( row [ 4 ] = = null ) ) continue ;
2006-01-10 17:48:59 +01:00
wordHash = new String ( row [ 0 ] , " UTF-8 " ) ;
2006-03-11 22:55:32 +01:00
//creationTime = kelondroRecords.bytes2long(row[2]);
2006-05-20 01:19:20 +02:00
wordEntry = new plasmaWordIndexEntryInstance ( new String ( row [ 3 ] , " UTF-8 " ) , new String ( row [ 4 ] , " UTF-8 " ) ) ;
2005-07-18 15:32:44 +02:00
// store to cache
2006-03-13 11:43:12 +01:00
addEntry ( wordHash , wordEntry , startTime , false ) ;
2005-07-18 15:32:44 +02:00
urlCount + + ;
// protect against memory shortage
2006-02-14 01:12:07 +01:00
//while (rt.freeMemory() < 1000000) {flushFromMem(); java.lang.System.gc();}
2005-07-18 15:32:44 +02:00
// write a log
if ( System . currentTimeMillis ( ) > messageTime ) {
System . gc ( ) ; // for better statistic
urlsPerSecond = 1 + urlCount * 1000 / ( 1 + System . currentTimeMillis ( ) - startTime ) ;
log . logInfo ( " restoring status: " + urlCount + " urls done, " + ( i / urlsPerSecond ) + " seconds remaining, free mem = " + ( Runtime . getRuntime ( ) . freeMemory ( ) / 1024 / 1024 ) + " MB " ) ;
messageTime = System . currentTimeMillis ( ) + 5000 ;
}
}
}
2005-11-04 14:41:51 +01:00
2005-07-18 15:32:44 +02:00
dumpArray . close ( ) ;
2006-03-13 11:43:12 +01:00
log . logConfig ( " restored " + wCache . size ( ) + " words in " + ( ( System . currentTimeMillis ( ) - startTime ) / 1000 ) + " seconds " ) ;
2005-07-18 15:32:44 +02:00
} catch ( kelondroException e ) {
// restore failed
2005-08-30 23:32:59 +02:00
log . logSevere ( " restore of indexCache array dump failed: " + e . getMessage ( ) , e ) ;
2005-08-30 11:07:42 +02:00
} finally {
if ( dumpArray ! = null ) try { dumpArray . close ( ) ; } catch ( Exception e ) { }
2005-05-17 10:25:04 +02:00
}
return urlCount ;
}
2005-11-04 14:41:51 +01:00
2005-05-17 10:25:04 +02:00
// cache settings
2005-11-04 14:41:51 +01:00
2006-03-13 11:43:12 +01:00
public int maxURLinWCache ( ) {
2006-03-10 14:57:30 +01:00
if ( hashScore . size ( ) = = 0 ) return 0 ;
2006-02-25 22:05:19 +01:00
return hashScore . getMaxScore ( ) ;
}
2006-03-13 11:43:12 +01:00
public long minAgeOfWCache ( ) {
2006-03-10 14:57:30 +01:00
if ( hashDate . size ( ) = = 0 ) return 0 ;
2006-03-09 12:31:17 +01:00
return System . currentTimeMillis ( ) - longEmit ( hashDate . getMaxScore ( ) ) ;
}
2006-03-13 11:43:12 +01:00
public long maxAgeOfWCache ( ) {
2006-03-10 14:57:30 +01:00
if ( hashDate . size ( ) = = 0 ) return 0 ;
2006-02-25 22:05:19 +01:00
return System . currentTimeMillis ( ) - longEmit ( hashDate . getMinScore ( ) ) ;
2005-05-17 10:25:04 +02:00
}
2006-03-13 11:43:12 +01:00
public long minAgeOfKCache ( ) {
if ( kCache . size ( ) = = 0 ) return 0 ;
return System . currentTimeMillis ( ) - ( ( Long ) kCache . lastKey ( ) ) . longValue ( ) ;
2005-05-17 10:25:04 +02:00
}
2005-11-04 14:41:51 +01:00
2006-03-13 11:43:12 +01:00
public long maxAgeOfKCache ( ) {
if ( kCache . size ( ) = = 0 ) return 0 ;
return System . currentTimeMillis ( ) - ( ( Long ) kCache . firstKey ( ) ) . longValue ( ) ;
2005-09-20 12:10:34 +02:00
}
2005-11-04 14:41:51 +01:00
2006-03-13 11:43:12 +01:00
public void setMaxWordCount ( int maxWords ) {
this . wCacheMaxCount = maxWords ;
}
public int getMaxWordCount ( ) {
return this . wCacheMaxCount ;
2005-09-20 12:10:34 +02:00
}
2006-02-14 01:12:07 +01:00
2006-03-13 11:43:12 +01:00
public int wSize ( ) {
return wCache . size ( ) ;
}
public int kSize ( ) {
return kCache . size ( ) ;
2005-05-17 10:25:04 +02:00
}
2005-11-04 14:41:51 +01:00
2006-03-15 17:01:42 +01:00
public int size ( ) {
return wCache . size ( ) + kCache . size ( ) ;
}
2006-01-30 13:42:06 +01:00
public int indexSize ( String wordHash ) {
int size = 0 ;
2006-03-13 11:43:12 +01:00
plasmaWordIndexEntryContainer cacheIndex = ( plasmaWordIndexEntryContainer ) wCache . get ( wordHash ) ;
2006-01-30 13:42:06 +01:00
if ( cacheIndex ! = null ) size + = cacheIndex . size ( ) ;
return size ;
}
2006-02-14 01:12:07 +01:00
public Iterator wordHashes ( String startWordHash , boolean rot ) {
if ( rot ) throw new UnsupportedOperationException ( " plasmaWordIndexCache cannot rotate " ) ;
2006-03-13 11:43:12 +01:00
return wCache . tailMap ( startWordHash ) . keySet ( ) . iterator ( ) ;
2006-01-14 00:59:04 +01:00
}
2006-03-13 11:43:12 +01:00
public void shiftK2W ( ) {
// find entries in kCache that are too old for that place and shift them to the wCache
long time ;
Long l ;
plasmaWordIndexEntryContainer container ;
synchronized ( kCache ) {
while ( kCache . size ( ) > 0 ) {
l = ( Long ) kCache . firstKey ( ) ;
time = l . longValue ( ) ;
if ( System . currentTimeMillis ( ) - time < kCacheMaxAge ) return ;
container = ( plasmaWordIndexEntryContainer ) kCache . remove ( l ) ;
addEntries ( container , container . updated ( ) , false ) ;
}
}
}
2006-02-14 01:12:07 +01:00
public String bestFlushWordHash ( ) {
2005-06-01 16:24:25 +02:00
// select appropriate hash
// we have 2 different methods to find a good hash:
// - the oldest entry in the cache
// - the entry with maximum count
2006-03-13 11:43:12 +01:00
shiftK2W ( ) ;
if ( wCache . size ( ) = = 0 ) return null ;
2005-06-01 16:24:25 +02:00
try {
2006-03-13 11:43:12 +01:00
synchronized ( wCache ) {
2006-02-25 22:05:19 +01:00
String hash = null ;
2005-12-13 17:00:20 +01:00
int count = hashScore . getMaxScore ( ) ;
2006-03-13 11:43:12 +01:00
if ( ( count > wCacheReferenceLimit ) & &
2006-03-14 00:06:06 +01:00
( ( hash = ( String ) hashScore . getMaxObject ( ) ) ! = null ) ) {
// we MUST flush high-score entries, because a loop deletes entries in cache until this condition fails
// in this cache we MUST NOT check wCacheMinAge
2006-02-14 01:12:07 +01:00
return hash ;
2006-02-25 22:05:19 +01:00
}
long oldestTime = longEmit ( hashDate . getMinScore ( ) ) ;
2006-03-13 11:43:12 +01:00
if ( ( ( System . currentTimeMillis ( ) - oldestTime ) > wCacheMaxAge ) & &
2006-02-25 22:05:19 +01:00
( ( hash = ( String ) hashDate . getMinObject ( ) ) ! = null ) ) {
// flush out-dated entries
2006-02-14 01:12:07 +01:00
return hash ;
2005-12-13 17:00:20 +01:00
}
2006-03-14 00:06:06 +01:00
// cases with respect to memory situation
if ( Runtime . getRuntime ( ) . freeMemory ( ) < 1000000 ) {
// urgent low-memory case
2006-02-25 22:05:19 +01:00
hash = ( String ) hashScore . getMaxObject ( ) ; // flush high-score entries (saves RAM)
} else {
2006-03-14 00:06:06 +01:00
// not-efficient-so-far case. cleans up unnecessary cache slots
hash = ( String ) hashDate . getMinObject ( ) ; // flush oldest entries
2006-02-25 22:05:19 +01:00
}
return hash ;
2005-06-01 16:24:25 +02:00
}
} catch ( Exception e ) {
2005-08-30 23:32:59 +02:00
log . logSevere ( " flushFromMem: " + e . getMessage ( ) , e ) ;
2005-06-01 16:24:25 +02:00
}
2006-02-14 01:12:07 +01:00
return null ;
2005-07-17 23:22:18 +02:00
}
2005-11-04 14:41:51 +01:00
2005-05-31 19:39:14 +02:00
private int intTime ( long longTime ) {
2006-03-13 11:43:12 +01:00
return ( int ) Math . max ( 0 , ( ( longTime - startTime ) / 1000 ) ) ;
2005-05-31 19:39:14 +02:00
}
2005-11-04 14:41:51 +01:00
2006-02-25 22:05:19 +01:00
private long longEmit ( int intTime ) {
return ( ( ( long ) intTime ) * ( long ) 1000 ) + startTime ;
}
2006-05-26 11:32:50 +02:00
public plasmaWordIndexEntryContainer getContainer ( String wordHash , boolean deleteIfEmpty , long maxtime_dummy ) {
2006-03-13 11:43:12 +01:00
return ( plasmaWordIndexEntryContainer ) wCache . get ( wordHash ) ;
2005-05-17 10:25:04 +02:00
}
2005-11-04 14:41:51 +01:00
2006-02-14 01:12:07 +01:00
public plasmaWordIndexEntryContainer deleteContainer ( String wordHash ) {
// returns the index that had been deleted
2006-03-13 11:43:12 +01:00
synchronized ( wCache ) {
plasmaWordIndexEntryContainer container = ( plasmaWordIndexEntryContainer ) wCache . remove ( wordHash ) ;
2005-05-17 10:25:04 +02:00
hashScore . deleteScore ( wordHash ) ;
2005-05-31 19:39:14 +02:00
hashDate . deleteScore ( wordHash ) ;
2006-02-14 01:12:07 +01:00
return container ;
2005-05-17 10:25:04 +02:00
}
}
2006-02-14 01:12:07 +01:00
public int removeEntries ( String wordHash , String [ ] urlHashes , boolean deleteComplete ) {
if ( urlHashes . length = = 0 ) return 0 ;
int count = 0 ;
2006-03-13 11:43:12 +01:00
synchronized ( wCache ) {
2006-02-14 01:12:07 +01:00
plasmaWordIndexEntryContainer c = ( plasmaWordIndexEntryContainer ) deleteContainer ( wordHash ) ;
if ( c ! = null ) {
2006-02-14 14:06:32 +01:00
count = c . removeEntries ( wordHash , urlHashes , deleteComplete ) ;
2006-02-14 01:12:07 +01:00
if ( c . size ( ) ! = 0 ) this . addEntries ( c , System . currentTimeMillis ( ) , false ) ;
}
}
return count ;
2005-05-17 10:25:04 +02:00
}
2006-03-09 11:46:02 +01:00
2006-03-13 11:43:12 +01:00
public int tryRemoveURLs ( String urlHash ) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// Such entries must be searched in the latest entries
int delCount = 0 ;
synchronized ( kCache ) {
Iterator i = kCache . entrySet ( ) . iterator ( ) ;
Map . Entry entry ;
Long l ;
plasmaWordIndexEntryContainer c ;
while ( i . hasNext ( ) ) {
entry = ( Map . Entry ) i . next ( ) ;
l = ( Long ) entry . getKey ( ) ;
// get container
c = ( plasmaWordIndexEntryContainer ) entry . getValue ( ) ;
if ( c . remove ( urlHash ) ! = null ) {
if ( c . size ( ) = = 0 ) {
i . remove ( ) ;
} else {
kCache . put ( l , c ) ; // superfluous?
}
delCount + + ;
}
}
}
return delCount ;
}
public int addEntries ( plasmaWordIndexEntryContainer container , long updateTime , boolean dhtCase ) {
2005-07-20 02:39:06 +02:00
// this puts the entries into the cache, not into the assortment directly
2005-10-11 09:06:33 +02:00
int added = 0 ;
2005-11-04 14:41:51 +01:00
2005-10-09 06:43:07 +02:00
// put new words into cache
2006-03-13 11:43:12 +01:00
if ( dhtCase ) synchronized ( kCache ) {
// put container into kCache
kCache . put ( new Long ( updateTime + kCacheInc ) , container ) ;
kCacheInc + + ;
if ( kCacheInc > 10000 ) kCacheInc = 0 ;
added = container . size ( ) ;
} else synchronized ( wCache ) {
// put container into wCache
String wordHash = container . wordHash ( ) ;
plasmaWordIndexEntryContainer entries = ( plasmaWordIndexEntryContainer ) wCache . get ( wordHash ) ; // null pointer exception? wordhash != null! must be cache==null
2005-10-09 06:43:07 +02:00
if ( entries = = null ) entries = new plasmaWordIndexEntryContainer ( wordHash ) ;
2006-04-02 22:40:07 +02:00
added = entries . add ( container , - 1 ) ;
2005-05-17 10:25:04 +02:00
if ( added > 0 ) {
2006-03-13 11:43:12 +01:00
wCache . put ( wordHash , entries ) ;
2005-05-17 10:25:04 +02:00
hashScore . addScore ( wordHash , added ) ;
2005-05-31 19:39:14 +02:00
hashDate . setScore ( wordHash , intTime ( updateTime ) ) ;
2005-05-17 10:25:04 +02:00
}
2006-03-13 11:43:12 +01:00
entries = null ;
2005-10-09 06:43:07 +02:00
}
2005-05-17 10:25:04 +02:00
return added ;
}
2006-05-26 11:32:50 +02:00
public boolean addEntry ( String wordHash , indexEntry newEntry , long updateTime , boolean dhtCase ) {
2006-03-13 11:43:12 +01:00
if ( dhtCase ) synchronized ( kCache ) {
// put container into kCache
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer ( wordHash ) ;
container . add ( newEntry ) ;
kCache . put ( new Long ( updateTime + kCacheInc ) , container ) ;
kCacheInc + + ;
if ( kCacheInc > 10000 ) kCacheInc = 0 ;
return true ;
} else synchronized ( wCache ) {
plasmaWordIndexEntryContainer container = ( plasmaWordIndexEntryContainer ) wCache . get ( wordHash ) ;
2005-12-13 17:00:20 +01:00
if ( container = = null ) container = new plasmaWordIndexEntryContainer ( wordHash ) ;
2006-05-26 11:32:50 +02:00
indexEntry [ ] entries = new indexEntry [ ] { newEntry } ;
2005-12-13 17:00:20 +01:00
if ( container . add ( entries , updateTime ) > 0 ) {
2006-03-13 11:43:12 +01:00
wCache . put ( wordHash , container ) ;
2005-12-13 17:00:20 +01:00
hashScore . incScore ( wordHash ) ;
hashDate . setScore ( wordHash , intTime ( updateTime ) ) ;
2006-02-25 17:10:31 +01:00
return true ;
2005-12-13 17:00:20 +01:00
}
2006-03-13 11:43:12 +01:00
container = null ;
entries = null ;
return false ;
2005-05-17 10:25:04 +02:00
}
}
public void close ( int waitingSeconds ) {
2006-01-23 14:45:14 +01:00
// dump cache
2005-05-17 10:25:04 +02:00
try {
dump ( waitingSeconds ) ;
} catch ( IOException e ) {
2005-08-30 23:32:59 +02:00
log . logSevere ( " unable to dump cache: " + e . getMessage ( ) , e ) ;
2005-05-17 10:25:04 +02:00
}
2005-07-20 02:39:06 +02:00
}
2006-01-14 00:59:04 +01:00
}