2011-03-08 02:51:51 +01:00
// IODispatcher.java
2009-03-20 16:44:59 +01:00
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 20.03.2009 on http://yacy.net
//
2011-03-08 02:51:51 +01:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2009-03-20 16:44:59 +01:00
//
// LICENSE
2011-06-30 17:37:14 +02:00
//
2009-03-20 16:44:59 +01:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-10-10 02:39:15 +02:00
package net.yacy.kelondro.rwi ;
2009-03-20 16:44:59 +01:00
import java.io.File ;
import java.io.IOException ;
import java.util.concurrent.ArrayBlockingQueue ;
2009-04-28 09:24:28 +02:00
import java.util.concurrent.Semaphore ;
2009-03-20 16:44:59 +01:00
2013-07-09 14:28:25 +02:00
import net.yacy.cora.util.ConcurrentLog ;
2009-10-10 02:43:25 +02:00
import net.yacy.kelondro.blob.ArrayStack ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.MemoryControl ;
2009-10-10 01:13:30 +02:00
2009-03-20 16:44:59 +01:00
/ * *
* this is a concurrent merger that can merge single files that are queued for merging .
* when several ReferenceContainerArray classes host their ReferenceContainer file arrays ,
* they may share a single ReferenceContainerMerger object which does the sharing for all
* of them . This is the best way to do the merging , because it does heavy IO access and
* such access should not be performed concurrently , but queued . This class is the
* manaagement class for queueing of merge jobs .
*
* to use this class , first instantiate a object and then start the concurrent execution
* of merging with a call to the start ( ) - method . To shut down all mergings , call terminate ( )
* only once .
* /
2009-05-29 12:03:35 +02:00
public class IODispatcher extends Thread {
2009-03-20 16:44:59 +01:00
2013-07-09 14:28:25 +02:00
private static final ConcurrentLog log = new ConcurrentLog ( " IODispatcher " ) ;
2012-07-13 21:15:38 +02:00
2009-06-30 15:25:46 +02:00
private Semaphore controlQueue ;
2010-01-11 00:09:48 +01:00
private final Semaphore termination ;
2009-06-30 15:25:46 +02:00
private ArrayBlockingQueue < MergeJob > mergeQueue ;
private ArrayBlockingQueue < DumpJob < ? extends Reference > > dumpQueue ;
2009-05-29 12:03:35 +02:00
//private ReferenceFactory<ReferenceType> factory;
2009-06-30 15:25:46 +02:00
private boolean terminate ;
2011-06-30 17:37:14 +02:00
private final int writeBufferSize ;
public IODispatcher ( final int dumpQueueLength , final int mergeQueueLength , final int writeBufferSize ) {
2009-04-28 09:24:28 +02:00
this . termination = new Semaphore ( 0 ) ;
this . controlQueue = new Semaphore ( 0 ) ;
2009-05-29 12:03:35 +02:00
this . dumpQueue = new ArrayBlockingQueue < DumpJob < ? extends Reference > > ( dumpQueueLength ) ;
2009-03-30 08:22:27 +02:00
this . mergeQueue = new ArrayBlockingQueue < MergeJob > ( mergeQueueLength ) ;
2009-05-04 10:29:44 +02:00
this . writeBufferSize = writeBufferSize ;
2009-04-28 09:24:28 +02:00
this . terminate = false ;
2014-02-28 15:02:01 +01:00
this . setName ( " IODispatcher " ) ;
2009-03-20 16:44:59 +01:00
}
2011-06-30 17:37:14 +02:00
2010-04-18 23:55:20 +02:00
public void terminate ( ) {
2015-03-11 19:36:23 +01:00
this . terminate = true ; // asure current run() loop will termiate
2011-06-30 17:37:14 +02:00
if ( this . termination ! = null & & this . controlQueue ! = null & & isAlive ( ) ) {
2009-04-28 09:24:28 +02:00
this . controlQueue . release ( ) ;
2009-03-30 08:22:27 +02:00
// await termination
try {
2011-06-30 17:37:14 +02:00
this . termination . acquire ( ) ;
} catch ( final InterruptedException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2009-03-30 08:22:27 +02:00
}
2009-03-20 16:44:59 +01:00
}
2009-03-30 08:22:27 +02:00
}
2010-06-26 12:32:47 +02:00
2011-03-14 22:07:15 +01:00
@SuppressWarnings ( " unchecked " )
2011-06-30 17:37:14 +02:00
protected synchronized void dump ( final ReferenceContainerCache < ? extends Reference > cache , final File file , final ReferenceContainerArray < ? extends Reference > array ) {
if ( this . dumpQueue = = null | | this . controlQueue = = null | | ! isAlive ( ) ) {
2013-07-09 14:28:25 +02:00
log . warn ( " emergency dump of file " + file . getName ( ) ) ;
2011-06-30 17:37:14 +02:00
if ( ! cache . isEmpty ( ) ) cache . dump ( file , ( int ) Math . min ( MemoryControl . available ( ) / 3 , this . writeBufferSize ) , true ) ;
2009-03-30 08:22:27 +02:00
} else {
2011-03-14 23:52:30 +01:00
@SuppressWarnings ( " rawtypes " )
2011-06-30 17:37:14 +02:00
final
2010-06-26 12:32:47 +02:00
DumpJob < ? extends Reference > job = new DumpJob ( cache , file , array ) ;
2011-06-30 17:37:14 +02:00
// check if the dispatcher is running
if ( isAlive ( ) ) {
try {
2015-05-25 19:46:26 +02:00
this . dumpQueue . add ( job ) ;
2013-07-09 14:28:25 +02:00
log . info ( " appended dump job for file " + file . getName ( ) ) ;
2015-05-25 19:46:26 +02:00
} catch ( final IllegalStateException e ) {
log . warn ( " could not append dump job, emergency dump of file " + file . getName ( ) ) ;
2011-06-30 17:37:14 +02:00
cache . dump ( file , ( int ) Math . min ( MemoryControl . available ( ) / 3 , this . writeBufferSize ) , true ) ;
} finally {
this . controlQueue . release ( ) ;
2009-10-06 23:52:55 +02:00
}
2011-06-30 17:37:14 +02:00
} else {
job . dump ( ) ;
2013-07-09 14:28:25 +02:00
log . warn ( " dispatcher is not alive, just dumped file " + file . getName ( ) ) ;
2009-03-30 08:22:27 +02:00
}
2009-03-20 16:44:59 +01:00
}
}
2011-06-30 17:37:14 +02:00
2010-04-16 18:07:19 +02:00
protected synchronized int queueLength ( ) {
2011-06-30 17:37:14 +02:00
return ( this . controlQueue = = null | | ! isAlive ( ) ) ? 0 : this . controlQueue . availablePermits ( ) ;
2009-03-30 08:22:27 +02:00
}
2011-06-30 17:37:14 +02:00
protected synchronized void merge ( final File f1 , final File f2 , final ReferenceFactory < ? extends Reference > factory , final ArrayStack array , final File newFile ) {
if ( this . mergeQueue = = null | | this . controlQueue = = null | | ! isAlive ( ) ) {
2010-01-04 19:12:03 +01:00
if ( f2 = = null ) {
2013-07-09 14:28:25 +02:00
log . warn ( " emergency rewrite of file " + f1 . getName ( ) + " to " + newFile . getName ( ) ) ;
2010-01-04 19:12:03 +01:00
} else {
2013-07-09 14:28:25 +02:00
log . warn ( " emergency merge of files " + f1 . getName ( ) + " , " + f2 . getName ( ) + " to " + newFile . getName ( ) ) ;
2010-01-04 19:12:03 +01:00
}
2011-06-30 17:37:14 +02:00
array . mergeMount ( f1 , f2 , factory , newFile , ( int ) Math . min ( MemoryControl . available ( ) / 3 , this . writeBufferSize ) ) ;
2009-03-20 16:44:59 +01:00
} else {
2011-06-30 17:37:14 +02:00
final MergeJob job = new MergeJob ( f1 , f2 , factory , array , newFile ) ;
if ( isAlive ( ) ) {
try {
2015-03-18 21:57:41 +01:00
this . mergeQueue . add ( job ) ;
2010-01-04 19:12:03 +01:00
if ( f2 = = null ) {
2013-07-09 14:28:25 +02:00
log . info ( " appended rewrite job of file " + f1 . getName ( ) + " to " + newFile . getName ( ) ) ;
2010-01-04 19:12:03 +01:00
} else {
2013-07-09 14:28:25 +02:00
log . info ( " appended merge job of files " + f1 . getName ( ) + " , " + f2 . getName ( ) + " to " + newFile . getName ( ) ) ;
2010-01-04 19:12:03 +01:00
}
2015-05-25 19:46:26 +02:00
} catch ( final IllegalStateException e ) { // because mergeQueue size is 1, IllegalStateException could happen frequently (serial execution ensured in run() )
log . warn ( " Could not add merge job to queue: " + e . getMessage ( ) ) ;
2011-06-30 17:37:14 +02:00
} finally {
this . controlQueue . release ( ) ;
}
} else {
job . merge ( ) ;
if ( f2 = = null ) {
2013-07-09 14:28:25 +02:00
log . warn ( " dispatcher not running, merged files " + f1 . getName ( ) + " to " + newFile . getName ( ) ) ;
2009-10-06 23:52:55 +02:00
} else {
2013-07-09 14:28:25 +02:00
log . warn ( " dispatcher not running, rewrote file " + f1 . getName ( ) + " , " + f2 . getName ( ) + " to " + newFile . getName ( ) ) ;
2009-10-06 23:52:55 +02:00
}
2009-03-20 16:44:59 +01:00
}
}
}
2011-06-30 17:37:14 +02:00
2011-03-08 02:51:51 +01:00
@Override
2009-03-20 16:44:59 +01:00
public void run ( ) {
2009-03-30 08:22:27 +02:00
MergeJob mergeJob ;
2009-05-29 12:03:35 +02:00
DumpJob < ? extends Reference > dumpJob ;
2009-03-20 16:44:59 +01:00
try {
2009-08-30 01:35:03 +02:00
loop : while ( true ) try {
2011-06-30 17:37:14 +02:00
this . controlQueue . acquire ( ) ;
2009-03-30 08:22:27 +02:00
// prefer dump actions to flush memory to disc
2011-06-30 17:37:14 +02:00
if ( ! this . dumpQueue . isEmpty ( ) ) {
2009-05-04 23:37:59 +02:00
File f = null ;
2009-04-24 13:55:39 +02:00
try {
2011-06-30 17:37:14 +02:00
dumpJob = this . dumpQueue . take ( ) ;
2009-05-04 23:37:59 +02:00
f = dumpJob . file ;
2009-04-24 13:55:39 +02:00
dumpJob . dump ( ) ;
2011-06-30 17:37:14 +02:00
} catch ( final InterruptedException e ) {
2013-07-09 14:28:25 +02:00
log . severe ( " main run job was interrupted (1) " , e ) ;
2011-09-05 02:09:48 +02:00
} catch ( final Throwable e ) {
2013-07-09 14:28:25 +02:00
log . severe ( " main run job had errors (1), dump to " + f + " failed. " , e ) ;
2009-04-24 13:55:39 +02:00
}
2009-03-30 08:22:27 +02:00
continue loop ;
}
2011-06-30 17:37:14 +02:00
2009-03-30 08:22:27 +02:00
// otherwise do a merge operation
2011-08-24 14:07:53 +02:00
if ( ! this . mergeQueue . isEmpty ( ) & & ! MemoryControl . shortStatus ( ) ) {
2009-05-04 23:37:59 +02:00
File f = null , f1 = null , f2 = null ;
2009-04-24 13:55:39 +02:00
try {
2011-06-30 17:37:14 +02:00
mergeJob = this . mergeQueue . take ( ) ;
2009-05-04 23:37:59 +02:00
f = mergeJob . newFile ;
f1 = mergeJob . f1 ;
f2 = mergeJob . f2 ;
2009-04-24 13:55:39 +02:00
mergeJob . merge ( ) ;
2011-06-30 17:37:14 +02:00
} catch ( final InterruptedException e ) {
2013-07-09 14:28:25 +02:00
log . severe ( " main run job was interrupted (2) " , e ) ;
2011-09-05 02:09:48 +02:00
} catch ( final Throwable e ) {
2010-01-04 19:12:03 +01:00
if ( f2 = = null ) {
2013-07-09 14:28:25 +02:00
log . severe ( " main run job had errors (2), dump to " + f + " failed. Input file is " + f1 , e ) ;
2010-01-04 19:12:03 +01:00
} else {
2013-07-09 14:28:25 +02:00
log . severe ( " main run job had errors (2), dump to " + f + " failed. Input files are " + f1 + " and " + f2 , e ) ;
2010-01-04 19:12:03 +01:00
}
2009-04-24 13:55:39 +02:00
}
2009-03-30 08:22:27 +02:00
continue loop ;
}
2011-06-30 17:37:14 +02:00
2009-04-28 09:24:28 +02:00
// check termination
if ( this . terminate ) {
2013-07-09 14:28:25 +02:00
log . info ( " caught termination signal " ) ;
2009-04-28 09:24:28 +02:00
break ;
}
2015-05-25 19:46:26 +02:00
2011-09-05 02:09:48 +02:00
} catch ( final Throwable e ) {
2013-07-09 14:28:25 +02:00
log . severe ( " main run job failed (X) " , e ) ;
2009-03-20 16:44:59 +01:00
}
2013-07-09 14:28:25 +02:00
log . info ( " loop terminated " ) ;
2011-09-05 02:09:48 +02:00
} catch ( final Throwable e ) {
2013-07-09 14:28:25 +02:00
log . severe ( " main run job failed (4) " , e ) ;
2009-03-20 16:44:59 +01:00
} finally {
2013-07-09 14:28:25 +02:00
log . info ( " terminating run job " ) ;
2011-06-30 17:37:14 +02:00
this . controlQueue = null ;
this . dumpQueue = null ;
this . mergeQueue = null ;
this . termination . release ( ) ;
2009-03-20 16:44:59 +01:00
}
}
2011-06-30 17:37:14 +02:00
2011-03-14 23:52:30 +01:00
private class DumpJob < ReferenceType extends Reference > {
2011-06-30 17:37:14 +02:00
private final ReferenceContainerCache < ReferenceType > cache ;
private final File file ;
private final ReferenceContainerArray < ReferenceType > array ;
private DumpJob ( final ReferenceContainerCache < ReferenceType > cache , final File file , final ReferenceContainerArray < ReferenceType > array ) {
2009-03-30 08:22:27 +02:00
this . cache = cache ;
this . file = file ;
this . array = array ;
}
2010-04-16 18:07:19 +02:00
private void dump ( ) {
2009-03-30 08:22:27 +02:00
try {
2011-06-30 17:37:14 +02:00
if ( ! this . cache . isEmpty ( ) ) this . cache . dump ( this . file , ( int ) Math . min ( MemoryControl . available ( ) / 3 , IODispatcher . this . writeBufferSize ) , true ) ;
this . array . mountBLOBFile ( this . file ) ;
} catch ( final IOException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2009-03-30 08:22:27 +02:00
}
}
}
2011-06-30 17:37:14 +02:00
2010-04-16 18:07:19 +02:00
private class MergeJob {
2009-03-20 16:44:59 +01:00
2011-06-30 17:37:14 +02:00
private final File f1 , f2 , newFile ;
private final ArrayStack array ;
private final ReferenceFactory < ? extends Reference > factory ;
2010-04-16 18:07:19 +02:00
private MergeJob (
2011-06-30 17:37:14 +02:00
final File f1 ,
final File f2 ,
final ReferenceFactory < ? extends Reference > factory ,
final ArrayStack array ,
final File newFile ) {
2009-03-20 16:44:59 +01:00
this . f1 = f1 ;
this . f2 = f2 ;
2009-05-29 12:03:35 +02:00
this . factory = factory ;
2009-03-20 16:44:59 +01:00
this . newFile = newFile ;
this . array = array ;
}
2010-04-16 18:07:19 +02:00
private File merge ( ) {
2011-06-30 17:37:14 +02:00
if ( ! this . f1 . exists ( ) ) {
2013-07-09 14:28:25 +02:00
log . warn ( " merge of file (1) " + this . f1 . getName ( ) + " failed: file does not exists " ) ;
2009-05-02 03:40:03 +02:00
return null ;
}
2011-06-30 17:37:14 +02:00
if ( this . f2 ! = null & & ! this . f2 . exists ( ) ) {
2013-07-09 14:28:25 +02:00
log . warn ( " merge of file (2) " + this . f2 . getName ( ) + " failed: file does not exists " ) ;
2009-05-02 03:40:03 +02:00
return null ;
}
2011-06-30 17:37:14 +02:00
return this . array . mergeMount ( this . f1 , this . f2 , this . factory , this . newFile , ( int ) Math . min ( MemoryControl . available ( ) / 3 , IODispatcher . this . writeBufferSize ) ) ;
2009-03-20 16:44:59 +01:00
}
}
}