2009-03-16 19:08:43 +01:00
// CrawlQueues.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 29.10.2007 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
2011-06-13 23:44:03 +02:00
//
2009-03-16 19:08:43 +01:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.crawler ;
import java.io.File ;
import java.io.IOException ;
import java.net.MalformedURLException ;
import java.util.ArrayList ;
import java.util.Date ;
import java.util.Iterator ;
import java.util.Map ;
import java.util.concurrent.ConcurrentHashMap ;
2011-05-27 10:24:54 +02:00
import net.yacy.cora.document.ASCII ;
2010-05-25 14:54:57 +02:00
import net.yacy.cora.document.Hit ;
import net.yacy.cora.document.RSSFeed ;
2011-03-07 21:36:40 +01:00
import net.yacy.cora.document.UTF8 ;
2010-08-22 19:38:27 +02:00
import net.yacy.cora.protocol.ConnectionInfo ;
2011-06-13 23:44:03 +02:00
import net.yacy.cora.services.federated.yacy.CacheStrategy ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.meta.DigestURI ;
2009-10-10 01:13:30 +02:00
import net.yacy.kelondro.logging.Log ;
2010-04-08 02:11:32 +02:00
import net.yacy.kelondro.order.Base64Order ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.FileUtils ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.workflow.WorkflowJob ;
2011-10-04 11:06:24 +02:00
import net.yacy.peers.Protocol ;
import net.yacy.peers.Seed ;
2011-09-25 18:59:06 +02:00
import net.yacy.peers.dht.PeerSelection ;
2012-07-02 13:57:29 +02:00
import net.yacy.repository.Blacklist.BlacklistType ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
import net.yacy.search.Switchboard.indexingQueueEntry ;
2011-11-25 12:23:52 +01:00
import net.yacy.search.SwitchboardConstants ;
2010-12-11 01:31:57 +01:00
import de.anomic.crawler.NoticedURL.StackType ;
2011-05-26 12:57:02 +02:00
import de.anomic.crawler.ZURL.FailCategory ;
2009-07-15 23:07:46 +02:00
import de.anomic.crawler.retrieval.Request ;
import de.anomic.crawler.retrieval.Response ;
2009-03-16 19:08:43 +01:00
public class CrawlQueues {
2012-04-24 16:07:03 +02:00
private static final String ERROR_DB_FILENAME = " urlError4.db " ;
private static final String DELEGATED_DB_FILENAME = " urlDelegated4.db " ;
2010-03-07 01:33:39 +01:00
2009-07-19 22:37:44 +02:00
protected Switchboard sb ;
2009-06-30 15:25:46 +02:00
protected Log log ;
2010-12-11 01:31:57 +01:00
protected Map < Integer , Loader > workers ; // mapping from url hash to Worker thread object
2009-06-30 15:25:46 +02:00
private final ArrayList < String > remoteCrawlProviderHashes ;
2009-03-16 19:08:43 +01:00
public NoticedURL noticeURL ;
public ZURL errorURL , delegatedURL ;
2011-06-13 23:44:03 +02:00
2009-07-30 17:49:23 +02:00
public CrawlQueues ( final Switchboard sb , final File queuePath ) {
2009-03-16 19:08:43 +01:00
this . sb = sb ;
this . log = new Log ( " CRAWLER " ) ;
2010-12-11 01:31:57 +01:00
this . workers = new ConcurrentHashMap < Integer , Loader > ( ) ;
2009-03-16 19:08:43 +01:00
this . remoteCrawlProviderHashes = new ArrayList < String > ( ) ;
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// start crawling management
2011-06-13 23:44:03 +02:00
this . log . logConfig ( " Starting Crawling Management " ) ;
this . noticeURL = new NoticedURL ( queuePath , sb . peers . myBotIDs ( ) , sb . useTailCache , sb . exceed134217727 ) ;
2010-03-07 01:33:39 +01:00
FileUtils . deletedelete ( new File ( queuePath , ERROR_DB_FILENAME ) ) ;
2012-06-28 14:27:29 +02:00
this . errorURL = new ZURL ( sb . index . getRemoteSolr ( ) , sb . solrScheme , queuePath , ERROR_DB_FILENAME , false , sb . useTailCache , sb . exceed134217727 ) ;
this . delegatedURL = new ZURL ( sb . index . getRemoteSolr ( ) , sb . solrScheme , queuePath , DELEGATED_DB_FILENAME , true , sb . useTailCache , sb . exceed134217727 ) ;
2009-07-30 17:49:23 +02:00
}
2011-06-13 23:44:03 +02:00
2009-07-30 17:49:23 +02:00
public void relocate ( final File newQueuePath ) {
2011-06-13 23:44:03 +02:00
close ( ) ;
2010-12-11 01:31:57 +01:00
this . workers = new ConcurrentHashMap < Integer , Loader > ( ) ;
2009-07-30 17:49:23 +02:00
this . remoteCrawlProviderHashes . clear ( ) ;
2011-06-13 23:44:03 +02:00
this . noticeURL = new NoticedURL ( newQueuePath , this . sb . peers . myBotIDs ( ) , this . sb . useTailCache , this . sb . exceed134217727 ) ;
2010-03-07 01:33:39 +01:00
FileUtils . deletedelete ( new File ( newQueuePath , ERROR_DB_FILENAME ) ) ;
2012-06-28 14:27:29 +02:00
this . errorURL = new ZURL ( this . sb . index . getRemoteSolr ( ) , this . sb . solrScheme , newQueuePath , ERROR_DB_FILENAME , false , this . sb . useTailCache , this . sb . exceed134217727 ) ;
this . delegatedURL = new ZURL ( this . sb . index . getRemoteSolr ( ) , this . sb . solrScheme , newQueuePath , DELEGATED_DB_FILENAME , true , this . sb . useTailCache , this . sb . exceed134217727 ) ;
2009-07-30 17:49:23 +02:00
}
2011-06-13 23:44:03 +02:00
2012-05-14 07:41:55 +02:00
public synchronized void close ( ) {
2009-07-30 17:49:23 +02:00
// wait for all workers to finish
2011-06-13 23:44:03 +02:00
for ( final Loader w : this . workers . values ( ) ) {
2009-07-30 17:49:23 +02:00
w . interrupt ( ) ;
}
2011-06-13 23:44:03 +02:00
for ( final Loader w : this . workers . values ( ) ) {
2009-07-30 17:49:23 +02:00
try {
w . join ( ) ;
2011-06-13 23:44:03 +02:00
} catch ( final InterruptedException e ) {
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2009-07-30 17:49:23 +02:00
}
}
2011-06-13 23:44:03 +02:00
this . noticeURL . close ( ) ;
this . errorURL . close ( ) ;
this . delegatedURL . close ( ) ;
2009-07-30 17:49:23 +02:00
}
2011-06-13 23:44:03 +02:00
2009-07-30 17:49:23 +02:00
public void clear ( ) {
// wait for all workers to finish
2011-06-13 23:44:03 +02:00
for ( final Loader w : this . workers . values ( ) ) {
2009-07-30 17:49:23 +02:00
w . interrupt ( ) ;
}
// TODO: wait some more time until all threads are finished
2011-06-13 23:44:03 +02:00
this . workers . clear ( ) ;
this . remoteCrawlProviderHashes . clear ( ) ;
this . noticeURL . clear ( ) ;
2009-07-30 17:49:23 +02:00
try {
2011-06-13 23:44:03 +02:00
this . errorURL . clear ( ) ;
2009-07-30 17:49:23 +02:00
} catch ( final IOException e ) {
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2009-07-30 17:49:23 +02:00
}
try {
2011-06-13 23:44:03 +02:00
this . delegatedURL . clear ( ) ;
2009-07-30 17:49:23 +02:00
} catch ( final IOException e ) {
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2009-03-16 19:08:43 +01:00
}
}
2011-06-13 23:44:03 +02:00
2009-07-30 11:08:44 +02:00
/ * *
2010-08-11 11:54:18 +02:00
* tests if hash occurs in any database
2009-07-30 11:08:44 +02:00
* @param hash
* @return if the hash exists , the name of the database is returned , otherwise null is returned
* /
2010-04-08 02:11:32 +02:00
public String urlExists ( final byte [ ] hash ) {
2012-01-03 17:49:37 +01:00
if ( this . delegatedURL . exists ( hash ) ) {
return " delegated " ;
}
if ( this . errorURL . exists ( hash ) ) {
return " errors " ;
}
if ( this . noticeURL . existsInStack ( hash ) ) {
return " crawler " ;
}
2011-06-13 23:44:03 +02:00
for ( final Loader worker : this . workers . values ( ) ) {
2012-01-03 17:49:37 +01:00
if ( Base64Order . enhancedCoder . equal ( worker . request . url ( ) . hash ( ) , hash ) ) {
return " worker " ;
}
2009-03-16 19:08:43 +01:00
}
return null ;
}
2011-06-13 23:44:03 +02:00
2010-03-26 19:33:20 +01:00
public void urlRemove ( final byte [ ] hash ) {
2011-06-13 23:44:03 +02:00
this . noticeURL . removeByURLHash ( hash ) ;
this . delegatedURL . remove ( hash ) ;
this . errorURL . remove ( hash ) ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
2010-04-08 02:11:32 +02:00
public DigestURI getURL ( final byte [ ] urlhash ) {
2009-03-16 19:08:43 +01:00
assert urlhash ! = null ;
2012-01-03 17:49:37 +01:00
if ( urlhash = = null | | urlhash . length = = 0 ) {
return null ;
}
2011-06-13 23:44:03 +02:00
ZURL . Entry ee = this . delegatedURL . get ( urlhash ) ;
2012-01-03 17:49:37 +01:00
if ( ee ! = null ) {
return ee . url ( ) ;
}
2011-06-13 23:44:03 +02:00
ee = this . errorURL . get ( urlhash ) ;
2012-01-03 17:49:37 +01:00
if ( ee ! = null ) {
return ee . url ( ) ;
}
2011-06-13 23:44:03 +02:00
for ( final Loader w : this . workers . values ( ) ) {
2012-01-03 17:49:37 +01:00
if ( Base64Order . enhancedCoder . equal ( w . request . url ( ) . hash ( ) , urlhash ) ) {
return w . request . url ( ) ;
}
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
final Request ne = this . noticeURL . get ( urlhash ) ;
2012-01-03 17:49:37 +01:00
if ( ne ! = null ) {
return ne . url ( ) ;
}
2009-03-16 19:08:43 +01:00
return null ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
public void cleanup ( ) {
// wait for all workers to finish
2011-06-13 23:44:03 +02:00
final int timeout = ( int ) this . sb . getConfigLong ( " crawler.clientTimeout " , 10000 ) ;
for ( final Loader w : this . workers . values ( ) ) {
2012-01-03 17:49:37 +01:00
if ( w . age ( ) > timeout ) {
w . interrupt ( ) ;
}
2009-03-16 19:08:43 +01:00
}
}
2011-06-13 23:44:03 +02:00
2009-07-15 23:07:46 +02:00
public Request [ ] activeWorkerEntries ( ) {
2011-06-13 23:44:03 +02:00
synchronized ( this . workers ) {
final Request [ ] e = new Request [ this . workers . size ( ) ] ;
2009-03-16 19:08:43 +01:00
int i = 0 ;
2011-06-13 23:44:03 +02:00
for ( final Loader w : this . workers . values ( ) ) {
2012-01-03 17:49:37 +01:00
if ( i > = e . length ) {
break ;
}
2010-04-27 23:47:41 +02:00
e [ i + + ] = w . request ;
}
2009-03-16 19:08:43 +01:00
return e ;
}
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
public int coreCrawlJobSize ( ) {
2012-02-02 21:33:42 +01:00
return this . noticeURL . stackSize ( NoticedURL . StackType . LOCAL ) + this . noticeURL . stackSize ( NoticedURL . StackType . NOLOAD ) ;
2009-03-16 19:08:43 +01:00
}
2012-01-04 20:16:37 +01:00
2012-01-04 20:32:42 +01:00
public boolean coreCrawlJob ( ) {
2011-06-13 23:44:03 +02:00
final boolean robinsonPrivateCase = ( this . sb . isRobinsonMode ( ) & &
2012-01-03 17:49:37 +01:00
! this . sb . getConfig ( SwitchboardConstants . CLUSTER_MODE , " " ) . equals ( SwitchboardConstants . CLUSTER_MODE_PUBLIC_CLUSTER ) ) ;
2011-06-13 23:44:03 +02:00
2012-01-04 20:16:37 +01:00
if ( ( robinsonPrivateCase | | coreCrawlJobSize ( ) < = 20 ) & & limitCrawlJobSize ( ) > 0 ) {
2009-03-16 19:08:43 +01:00
// move some tasks to the core crawl job so we have something to do
final int toshift = Math . min ( 10 , limitCrawlJobSize ( ) ) ; // this cannot be a big number because the balancer makes a forced waiting if it cannot balance
for ( int i = 0 ; i < toshift ; i + + ) {
2012-04-21 13:47:48 +02:00
this . noticeURL . shift ( NoticedURL . StackType . GLOBAL , NoticedURL . StackType . LOCAL , this . sb . crawler , this . sb . robots ) ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
this . log . logInfo ( " shifted " + toshift + " jobs from global crawl to local crawl (coreCrawlJobSize()= " + coreCrawlJobSize ( ) +
" , limitCrawlJobSize()= " + limitCrawlJobSize ( ) + " , cluster.mode= " + this . sb . getConfig ( SwitchboardConstants . CLUSTER_MODE , " " ) +
" , robinsonMode= " + ( ( this . sb . isRobinsonMode ( ) ) ? " on " : " off " ) ) ;
2009-03-16 19:08:43 +01:00
}
2011-03-09 18:04:34 +01:00
2012-02-02 21:33:42 +01:00
final String queueCheckCore = loadIsPossible ( NoticedURL . StackType . LOCAL ) ;
2011-06-13 23:44:03 +02:00
final String queueCheckNoload = loadIsPossible ( NoticedURL . StackType . NOLOAD ) ;
2011-03-09 18:04:34 +01:00
if ( queueCheckCore ! = null & & queueCheckNoload ! = null ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFine ( ) ) {
this . log . logFine ( " omitting de-queue/local: " + queueCheckCore + " : " + queueCheckNoload ) ;
}
2009-07-30 17:49:23 +02:00
return false ;
}
2011-06-13 23:44:03 +02:00
2009-07-30 17:49:23 +02:00
if ( isPaused ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL ) ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFine ( ) ) {
this . log . logFine ( " omitting de-queue/local: paused " ) ;
}
2009-07-30 17:49:23 +02:00
return false ;
}
2011-06-13 23:44:03 +02:00
// do a local crawl
2010-12-11 01:31:57 +01:00
Request urlEntry ;
2012-02-02 21:33:42 +01:00
while ( this . noticeURL . stackSize ( NoticedURL . StackType . LOCAL ) > 0 | | this . noticeURL . stackSize ( NoticedURL . StackType . NOLOAD ) > 0 ) {
2010-12-11 01:31:57 +01:00
final String stats = " LOCALCRAWL[ " +
2011-06-13 23:44:03 +02:00
this . noticeURL . stackSize ( NoticedURL . StackType . NOLOAD ) + " , " +
2012-02-02 21:33:42 +01:00
this . noticeURL . stackSize ( NoticedURL . StackType . LOCAL ) + " , " +
this . noticeURL . stackSize ( NoticedURL . StackType . GLOBAL ) + " , " +
2011-06-13 23:44:03 +02:00
this . noticeURL . stackSize ( NoticedURL . StackType . OVERHANG ) +
" , " + this . noticeURL . stackSize ( NoticedURL . StackType . REMOTE ) + " ] " ;
2009-03-16 19:08:43 +01:00
try {
2011-06-13 23:44:03 +02:00
if ( this . noticeURL . stackSize ( NoticedURL . StackType . NOLOAD ) > 0 ) {
2010-12-11 01:31:57 +01:00
// get one entry that will not be loaded, just indexed
2012-04-21 13:47:48 +02:00
urlEntry = this . noticeURL . pop ( NoticedURL . StackType . NOLOAD , true , this . sb . crawler , this . sb . robots ) ;
2012-01-03 17:49:37 +01:00
if ( urlEntry = = null ) {
continue ;
}
2010-12-11 01:31:57 +01:00
final String profileHandle = urlEntry . profileHandle ( ) ;
if ( profileHandle = = null ) {
2011-06-13 23:44:03 +02:00
this . log . logSevere ( stats + " : NULL PROFILE HANDLE ' " + urlEntry . profileHandle ( ) + " ' for URL " + urlEntry . url ( ) ) ;
2010-12-11 01:31:57 +01:00
return true ;
}
2011-06-13 23:44:03 +02:00
final CrawlProfile profile = this . sb . crawler . getActive ( ASCII . getBytes ( profileHandle ) ) ;
2011-02-12 01:01:40 +01:00
if ( profile = = null ) {
2011-06-13 23:44:03 +02:00
this . log . logSevere ( stats + " : NULL PROFILE HANDLE ' " + urlEntry . profileHandle ( ) + " ' for URL " + urlEntry . url ( ) ) ;
2010-12-11 01:31:57 +01:00
return true ;
}
try {
2012-06-28 14:27:29 +02:00
this . sb . indexingDocumentProcessor . enQueue ( new indexingQueueEntry ( new Response ( urlEntry , profile ) , null , null ) ) ;
2010-12-11 01:31:57 +01:00
Log . logInfo ( " CrawlQueues " , " placed NOLOAD URL on indexing queue: " + urlEntry . url ( ) . toNormalform ( true , false ) ) ;
2011-06-13 23:44:03 +02:00
} catch ( final InterruptedException e ) {
2010-12-11 01:31:57 +01:00
Log . logException ( e ) ;
}
return true ;
}
2011-06-13 23:44:03 +02:00
2012-04-21 13:47:48 +02:00
urlEntry = this . noticeURL . pop ( NoticedURL . StackType . LOCAL , true , this . sb . crawler , this . sb . robots ) ;
2012-01-03 17:49:37 +01:00
if ( urlEntry = = null ) {
continue ;
}
2009-03-16 19:08:43 +01:00
final String profileHandle = urlEntry . profileHandle ( ) ;
// System.out.println("DEBUG plasmaSwitchboard.processCrawling:
// profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
if ( profileHandle = = null ) {
2011-06-13 23:44:03 +02:00
this . log . logSevere ( stats + " : NULL PROFILE HANDLE ' " + urlEntry . profileHandle ( ) + " ' for URL " + urlEntry . url ( ) ) ;
2009-03-16 19:08:43 +01:00
return true ;
}
2010-12-11 01:31:57 +01:00
load ( urlEntry , stats , profileHandle ) ;
2009-03-16 19:08:43 +01:00
return true ;
} catch ( final IOException e ) {
2011-06-13 23:44:03 +02:00
this . log . logSevere ( stats + " : CANNOT FETCH ENTRY: " + e . getMessage ( ) , e ) ;
2012-01-03 17:49:37 +01:00
if ( e . getMessage ( ) . indexOf ( " hash is null " , 0 ) > 0 ) {
2012-02-02 21:33:42 +01:00
this . noticeURL . clear ( NoticedURL . StackType . LOCAL ) ;
2012-01-03 17:49:37 +01:00
}
2009-03-16 19:08:43 +01:00
}
}
return true ;
}
/ * *
* Make some checks if crawl is valid and start it
2011-06-13 23:44:03 +02:00
*
2009-03-16 19:08:43 +01:00
* @param urlEntry
* @param profileHandle
* @param stats String for log prefixing
* @return
* /
2011-06-13 23:44:03 +02:00
private void load ( final Request urlEntry , final String stats , final String profileHandle ) {
final CrawlProfile profile = this . sb . crawler . getActive ( UTF8 . getBytes ( profileHandle ) ) ;
2011-02-12 01:01:40 +01:00
if ( profile ! = null ) {
2009-03-16 19:08:43 +01:00
// check if the protocol is supported
2009-10-11 02:12:19 +02:00
final DigestURI url = urlEntry . url ( ) ;
2009-03-16 19:08:43 +01:00
final String urlProtocol = url . getProtocol ( ) ;
2011-06-13 23:44:03 +02:00
if ( this . sb . loader . isSupportedProtocol ( urlProtocol ) ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFine ( ) ) {
2011-06-13 23:44:03 +02:00
this . log . logFine ( stats + " : URL= " + urlEntry . url ( )
2011-05-27 10:24:54 +02:00
+ " , initiator= " + ( ( urlEntry . initiator ( ) = = null ) ? " " : ASCII . String ( urlEntry . initiator ( ) ) )
2009-03-16 19:08:43 +01:00
+ " , crawlOrder= " + ( ( profile . remoteIndexing ( ) ) ? " true " : " false " )
+ " , depth= " + urlEntry . depth ( )
+ " , crawlDepth= " + profile . depth ( )
2011-09-29 17:17:39 +02:00
+ " , must-match= " + profile . urlMustMatchPattern ( ) . toString ( )
+ " , must-not-match= " + profile . urlMustNotMatchPattern ( ) . toString ( )
2011-06-13 23:44:03 +02:00
+ " , permission= " + ( ( this . sb . peers = = null ) ? " undefined " : ( ( ( this . sb . peers . mySeed ( ) . isSenior ( ) ) | | ( this . sb . peers . mySeed ( ) . isPrincipal ( ) ) ) ? " true " : " false " ) ) ) ;
2012-01-03 17:49:37 +01:00
}
2009-03-16 19:08:43 +01:00
2009-06-06 10:46:59 +02:00
// work off one Crawl stack entry
2010-12-11 01:31:57 +01:00
if ( urlEntry = = null | | urlEntry . url ( ) = = null ) {
2011-06-13 23:44:03 +02:00
this . log . logInfo ( stats + " : urlEntry = null " ) ;
2009-06-06 10:46:59 +02:00
} else {
2010-12-11 01:31:57 +01:00
new Loader ( urlEntry ) ;
2009-06-06 10:46:59 +02:00
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
} else {
this . log . logSevere ( " Unsupported protocol in URL ' " + url . toString ( ) ) ;
}
} else {
2011-06-13 23:44:03 +02:00
this . log . logWarning ( stats + " : LOST PROFILE HANDLE ' " + urlEntry . profileHandle ( ) + " ' for URL " + urlEntry . url ( ) ) ;
2009-03-16 19:08:43 +01:00
}
}
/ * *
* if crawling was paused we have to wait until we were notified to continue
* blocks until pause is ended
* @param crawljob
* @return
* /
2011-06-13 23:44:03 +02:00
private boolean isPaused ( final String crawljob ) {
final Object [ ] status = this . sb . crawlJobsStatus . get ( crawljob ) ;
2009-03-16 19:08:43 +01:00
boolean pauseEnded = false ;
2009-07-19 22:37:44 +02:00
synchronized ( status [ SwitchboardConstants . CRAWLJOB_SYNC ] ) {
if ( ( ( Boolean ) status [ SwitchboardConstants . CRAWLJOB_STATUS ] ) . booleanValue ( ) ) {
2009-03-16 19:08:43 +01:00
try {
2009-07-19 22:37:44 +02:00
status [ SwitchboardConstants . CRAWLJOB_SYNC ] . wait ( ) ;
2009-03-16 19:08:43 +01:00
}
catch ( final InterruptedException e ) { pauseEnded = true ; }
}
}
return pauseEnded ;
}
/ * *
* Checks if crawl queue has elements and new crawl will not exceed thread - limit
* @param stackType
* @return
* /
2011-06-13 23:44:03 +02:00
private String loadIsPossible ( final StackType stackType ) {
2009-03-31 09:51:32 +02:00
//System.out.println("stacksize = " + noticeURL.stackSize(stackType));
2011-06-13 23:44:03 +02:00
if ( this . noticeURL . stackSize ( stackType ) = = 0 ) {
2009-03-16 19:08:43 +01:00
//log.logDebug("GlobalCrawl: queue is empty");
2009-07-30 17:49:23 +02:00
return " stack is empty " ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
2009-07-30 17:49:23 +02:00
// check the worker threads
2011-06-13 23:44:03 +02:00
final int maxWorkers = ( int ) this . sb . getConfigLong ( SwitchboardConstants . CRAWLER_THREADS_ACTIVE_MAX , 10 ) ;
2009-07-30 17:49:23 +02:00
if ( this . workers . size ( ) > = maxWorkers ) {
// too many worker threads, try a cleanup
2011-06-13 23:44:03 +02:00
cleanup ( ) ;
2009-03-16 19:08:43 +01:00
}
// check again
2009-07-30 17:49:23 +02:00
if ( this . workers . size ( ) > = maxWorkers ) {
return " too many workers active: " + this . workers . size ( ) ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
final String cautionCause = this . sb . onlineCaution ( ) ;
2009-07-15 16:15:51 +02:00
if ( cautionCause ! = null ) {
2009-07-30 17:49:23 +02:00
return " online caution: " + cautionCause ;
2009-03-16 19:08:43 +01:00
}
2009-07-30 17:49:23 +02:00
return null ;
2009-03-16 19:08:43 +01:00
}
public boolean remoteCrawlLoaderJob ( ) {
// check if we are allowed to crawl urls provided by other peers
2011-06-13 23:44:03 +02:00
if ( ! this . sb . peers . mySeed ( ) . getFlagAcceptRemoteCrawl ( ) ) {
2009-03-16 19:08:43 +01:00
//this.log.logInfo("remoteCrawlLoaderJob: not done, we are not allowed to do that");
return false ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// check if we are a senior peer
2011-06-13 23:44:03 +02:00
if ( ! this . sb . peers . mySeed ( ) . isActive ( ) ) {
2009-03-16 19:08:43 +01:00
//this.log.logInfo("remoteCrawlLoaderJob: not done, this should be a senior or principal peer");
return false ;
}
2011-06-13 23:44:03 +02:00
if ( this . workers . size ( ) > = this . sb . getConfigLong ( SwitchboardConstants . CRAWLER_THREADS_ACTIVE_MAX , 20 ) ) {
2009-03-16 19:08:43 +01:00
// try a cleanup
cleanup ( ) ;
}
// check again
2011-06-13 23:44:03 +02:00
if ( this . workers . size ( ) > = this . sb . getConfigLong ( SwitchboardConstants . CRAWLER_THREADS_ACTIVE_MAX , 20 ) ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFine ( ) ) {
this . log . logFine ( " remoteCrawlLoaderJob: too many processes in loader queue, dismissed ( " + " cacheLoader= " + this . workers . size ( ) + " ), httpClients = " + ConnectionInfo . getCount ( ) ) ;
}
2009-03-16 19:08:43 +01:00
return false ;
}
2011-06-13 23:44:03 +02:00
final String cautionCause = this . sb . onlineCaution ( ) ;
2009-07-15 16:15:51 +02:00
if ( cautionCause ! = null ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFine ( ) ) {
this . log . logFine ( " remoteCrawlLoaderJob: online caution for " + cautionCause + " , omitting processing " ) ;
}
2009-03-16 19:08:43 +01:00
return false ;
}
2011-06-13 23:44:03 +02:00
2010-09-16 11:34:17 +02:00
if ( remoteTriggeredCrawlJobSize ( ) > 200 ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFine ( ) ) {
this . log . logFine ( " remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing " ) ;
}
2009-03-16 19:08:43 +01:00
return false ;
}
2012-01-04 20:16:37 +01:00
2010-09-16 11:34:17 +02:00
if ( coreCrawlJobSize ( ) > 0 /*&& sb.indexingStorageProcessor.queueSize() > 0*/ ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFine ( ) ) {
this . log . logFine ( " remoteCrawlLoaderJob: a local crawl is running, omitting processing " ) ;
2012-01-04 20:16:37 +01:00
}
2009-03-16 19:08:43 +01:00
return false ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// check if we have an entry in the provider list, otherwise fill the list
2011-10-04 11:06:24 +02:00
Seed seed ;
2011-06-13 23:44:03 +02:00
if ( this . remoteCrawlProviderHashes . isEmpty ( ) ) {
if ( this . sb . peers ! = null & & this . sb . peers . sizeConnected ( ) > 0 ) {
2011-10-04 11:06:24 +02:00
final Iterator < Seed > e = PeerSelection . getProvidesRemoteCrawlURLs ( this . sb . peers ) ;
2009-03-16 19:08:43 +01:00
while ( e . hasNext ( ) ) {
seed = e . next ( ) ;
2012-01-03 17:49:37 +01:00
if ( seed ! = null ) {
this . remoteCrawlProviderHashes . add ( seed . hash ) ;
}
2009-03-16 19:08:43 +01:00
}
}
}
2012-01-03 17:49:37 +01:00
if ( this . remoteCrawlProviderHashes . isEmpty ( ) ) {
return false ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// take one entry from the provider list and load the entries from the remote peer
seed = null ;
String hash = null ;
2011-06-13 23:44:03 +02:00
while ( seed = = null & & ! this . remoteCrawlProviderHashes . isEmpty ( ) ) {
hash = this . remoteCrawlProviderHashes . remove ( this . remoteCrawlProviderHashes . size ( ) - 1 ) ;
2012-01-03 17:49:37 +01:00
if ( hash = = null ) {
continue ;
}
2011-06-13 23:44:03 +02:00
seed = this . sb . peers . get ( hash ) ;
2012-01-03 17:49:37 +01:00
if ( seed = = null ) {
continue ;
}
2009-03-16 19:08:43 +01:00
// check if the peer is inside our cluster
2011-06-13 23:44:03 +02:00
if ( ( this . sb . isRobinsonMode ( ) ) & & ( ! this . sb . isInMyCluster ( seed ) ) ) {
2009-03-16 19:08:43 +01:00
seed = null ;
continue ;
}
}
2012-01-03 17:49:37 +01:00
if ( seed = = null ) {
return false ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// we know a peer which should provide remote crawl entries. load them now.
2011-10-04 11:06:24 +02:00
final RSSFeed feed = Protocol . queryRemoteCrawlURLs ( this . sb . peers , seed , 60 , 8000 ) ;
2009-12-02 01:37:59 +01:00
if ( feed = = null | | feed . isEmpty ( ) ) {
2009-03-16 19:08:43 +01:00
// something is wrong with this provider. To prevent that we get not stuck with this peer
// we remove it from the peer list
2011-06-13 23:44:03 +02:00
this . sb . peers . peerActions . peerDeparture ( seed , " no results from provided remote crawls " ) ;
2010-09-16 11:34:17 +02:00
// try again and ask another peer
2009-03-16 19:08:43 +01:00
return remoteCrawlLoaderJob ( ) ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// parse the rss
2009-10-11 02:12:19 +02:00
DigestURI url , referrer ;
2009-03-16 19:08:43 +01:00
Date loaddate ;
2010-05-25 14:54:57 +02:00
for ( final Hit item : feed ) {
2009-03-16 19:08:43 +01:00
//System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate());
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// put url on remote crawl stack
try {
2010-10-26 18:10:20 +02:00
url = new DigestURI ( item . getLink ( ) ) ;
2009-03-16 19:08:43 +01:00
} catch ( final MalformedURLException e ) {
2010-09-16 11:34:17 +02:00
continue ;
2009-03-16 19:08:43 +01:00
}
try {
2010-10-26 18:10:20 +02:00
referrer = new DigestURI ( item . getReferrer ( ) ) ;
2009-03-16 19:08:43 +01:00
} catch ( final MalformedURLException e ) {
referrer = null ;
}
2010-06-27 12:45:20 +02:00
loaddate = item . getPubDate ( ) ;
2011-06-13 23:44:03 +02:00
final String urlRejectReason = this . sb . crawlStacker . urlInAcceptedDomain ( url ) ;
2009-03-16 19:08:43 +01:00
if ( urlRejectReason = = null ) {
// stack url
2012-01-03 17:49:37 +01:00
if ( this . sb . getLog ( ) . isFinest ( ) ) {
this . sb . getLog ( ) . logFinest ( " crawlOrder: stack: url=' " + url + " ' " ) ;
}
2011-06-13 23:44:03 +02:00
this . sb . crawlStacker . enqueueEntry ( new Request (
2011-05-27 10:24:54 +02:00
ASCII . getBytes ( hash ) ,
2009-03-16 19:08:43 +01:00
url ,
( referrer = = null ) ? null : referrer . hash ( ) ,
item . getDescription ( ) ,
loaddate ,
2011-06-13 23:44:03 +02:00
this . sb . crawler . defaultRemoteProfile . handle ( ) ,
2009-03-16 19:08:43 +01:00
0 ,
0 ,
2010-12-11 01:31:57 +01:00
0 ,
item . getSize ( )
2009-03-16 19:08:43 +01:00
) ) ;
} else {
2011-06-13 23:44:03 +02:00
this . log . logWarning ( " crawlOrder: Rejected URL ' " + urlToString ( url ) + " ': " + urlRejectReason ) ;
2009-03-16 19:08:43 +01:00
}
}
return true ;
}
/ * *
* @param url
* @return
* /
2012-07-05 12:38:41 +02:00
private static String urlToString ( final DigestURI url ) {
2009-03-16 19:08:43 +01:00
return ( url = = null ? " null " : url . toNormalform ( true , false ) ) ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
public int limitCrawlJobSize ( ) {
2012-02-02 21:33:42 +01:00
return this . noticeURL . stackSize ( NoticedURL . StackType . GLOBAL ) ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
2012-01-05 18:33:05 +01:00
public int noloadCrawlJobSize ( ) {
return this . noticeURL . stackSize ( NoticedURL . StackType . NOLOAD ) ;
}
2009-03-16 19:08:43 +01:00
public int remoteTriggeredCrawlJobSize ( ) {
2011-06-13 23:44:03 +02:00
return this . noticeURL . stackSize ( NoticedURL . StackType . REMOTE ) ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
public boolean remoteTriggeredCrawlJob ( ) {
// work off crawl requests that had been placed by other peers to our crawl stack
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// do nothing if either there are private processes to be done
// or there is no global crawl on the stack
2011-06-13 23:44:03 +02:00
final String queueCheck = loadIsPossible ( NoticedURL . StackType . REMOTE ) ;
2009-07-30 17:49:23 +02:00
if ( queueCheck ! = null ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFinest ( ) ) {
this . log . logFinest ( " omitting de-queue/remote: " + queueCheck ) ;
}
2009-07-30 17:49:23 +02:00
return false ;
}
2009-03-16 19:08:43 +01:00
2009-07-30 17:49:23 +02:00
if ( isPaused ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL ) ) {
2012-01-03 17:49:37 +01:00
if ( this . log . isFinest ( ) ) {
this . log . logFinest ( " omitting de-queue/remote: paused " ) ;
}
2009-07-30 17:49:23 +02:00
return false ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
// we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
2012-02-02 21:33:42 +01:00
final String stats = " REMOTETRIGGEREDCRAWL[ " + this . noticeURL . stackSize ( NoticedURL . StackType . LOCAL ) + " , " + this . noticeURL . stackSize ( NoticedURL . StackType . GLOBAL ) + " , " + this . noticeURL . stackSize ( NoticedURL . StackType . OVERHANG ) + " , "
2011-06-13 23:44:03 +02:00
+ this . noticeURL . stackSize ( NoticedURL . StackType . REMOTE ) + " ] " ;
2009-03-16 19:08:43 +01:00
try {
2012-04-21 13:47:48 +02:00
final Request urlEntry = this . noticeURL . pop ( NoticedURL . StackType . REMOTE , true , this . sb . crawler , this . sb . robots ) ;
2009-03-16 19:08:43 +01:00
final String profileHandle = urlEntry . profileHandle ( ) ;
// System.out.println("DEBUG plasmaSwitchboard.processCrawling:
// profileHandle = " + profileHandle + ", urlEntry.url = " +
// urlEntry.url());
2010-12-11 01:31:57 +01:00
load ( urlEntry , stats , profileHandle ) ;
2009-03-16 19:08:43 +01:00
return true ;
} catch ( final IOException e ) {
2011-06-13 23:44:03 +02:00
this . log . logSevere ( stats + " : CANNOT FETCH ENTRY: " + e . getMessage ( ) , e ) ;
2012-01-03 17:49:37 +01:00
if ( e . getMessage ( ) . indexOf ( " hash is null " , 0 ) > 0 ) {
this . noticeURL . clear ( NoticedURL . StackType . REMOTE ) ;
}
2009-03-16 19:08:43 +01:00
return true ;
}
}
2011-06-13 23:44:03 +02:00
2009-11-07 00:15:20 +01:00
public int workerSize ( ) {
2011-06-13 23:44:03 +02:00
return this . workers . size ( ) ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
2010-12-11 01:31:57 +01:00
protected final class Loader extends Thread {
2011-06-13 23:44:03 +02:00
2009-07-23 23:31:51 +02:00
protected Request request ;
2009-03-16 19:08:43 +01:00
private final Integer code ;
2010-01-11 23:18:38 +01:00
private final long start ;
2011-06-13 23:44:03 +02:00
2010-12-11 01:31:57 +01:00
public Loader ( final Request entry ) {
2009-03-16 19:08:43 +01:00
this . start = System . currentTimeMillis ( ) ;
2009-07-23 23:31:51 +02:00
this . request = entry ;
2009-10-11 02:12:19 +02:00
this . request . setStatus ( " worker-initialized " , WorkflowJob . STATUS_INITIATED ) ;
2009-03-16 19:08:43 +01:00
this . code = Integer . valueOf ( entry . hashCode ( ) ) ;
2011-06-13 23:44:03 +02:00
if ( ! CrawlQueues . this . workers . containsKey ( this . code ) ) {
CrawlQueues . this . workers . put ( this . code , this ) ;
2010-11-04 14:26:27 +01:00
try {
2011-06-13 23:44:03 +02:00
start ( ) ;
} catch ( final OutOfMemoryError e ) {
2010-11-04 14:26:27 +01:00
Log . logWarning ( " CrawlQueues " , " crawlWorker sequential fail-over: " + e . getMessage ( ) ) ;
2011-06-13 23:44:03 +02:00
run ( ) ;
2010-11-04 14:26:27 +01:00
}
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
setPriority ( Thread . MIN_PRIORITY ) ; // http requests from the crawler should not cause that other functions work worse
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
public long age ( ) {
2011-06-13 23:44:03 +02:00
return System . currentTimeMillis ( ) - this . start ;
2009-03-16 19:08:43 +01:00
}
2011-06-13 23:44:03 +02:00
2010-04-05 14:37:33 +02:00
@Override
2009-03-16 19:08:43 +01:00
public void run ( ) {
try {
// checking robots.txt for http(s) resources
2009-10-11 02:12:19 +02:00
this . request . setStatus ( " worker-checkingrobots " , WorkflowJob . STATUS_STARTED ) ;
2011-05-02 16:05:51 +02:00
RobotsTxtEntry robotsEntry ;
2011-06-13 23:44:03 +02:00
if ( ( this . request . url ( ) . getProtocol ( ) . equals ( " http " ) | | this . request . url ( ) . getProtocol ( ) . equals ( " https " ) ) & &
( robotsEntry = CrawlQueues . this . sb . robots . getEntry ( this . request . url ( ) , CrawlQueues . this . sb . peers . myBotIDs ( ) ) ) ! = null & &
robotsEntry . isDisallowed ( this . request . url ( ) ) ) {
2010-08-11 11:54:18 +02:00
//if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt.");
2011-06-13 23:44:03 +02:00
CrawlQueues . this . errorURL . push (
2009-07-23 23:31:51 +02:00
this . request ,
2011-06-13 23:44:03 +02:00
ASCII . getBytes ( CrawlQueues . this . sb . peers . mySeed ( ) . hash ) ,
2009-03-16 19:08:43 +01:00
new Date ( ) ,
1 ,
2011-05-26 12:57:02 +02:00
FailCategory . FINAL_ROBOTS_RULE ,
2011-05-02 16:05:51 +02:00
" denied by robots.txt " , - 1 ) ;
2009-10-11 02:12:19 +02:00
this . request . setStatus ( " worker-disallowed " , WorkflowJob . STATUS_FINISHED ) ;
2009-03-16 19:08:43 +01:00
} else {
// starting a load from the internet
2009-10-11 02:12:19 +02:00
this . request . setStatus ( " worker-loading " , WorkflowJob . STATUS_RUNNING ) ;
2009-07-23 23:31:51 +02:00
String result = null ;
2011-06-13 23:44:03 +02:00
2009-07-24 13:54:04 +02:00
// load a resource and push queue entry to switchboard queue
2009-07-23 23:31:51 +02:00
// returns null if everything went fine, a fail reason string if a problem occurred
try {
2011-06-13 23:44:03 +02:00
this . request . setStatus ( " loading " , WorkflowJob . STATUS_RUNNING ) ;
final CrawlProfile e = CrawlQueues . this . sb . crawler . getActive ( UTF8 . getBytes ( this . request . profileHandle ( ) ) ) ;
2012-07-02 13:57:29 +02:00
final Response response = CrawlQueues . this . sb . loader . load ( this . request , e = = null ? CacheStrategy . IFEXIST : e . cacheStrategy ( ) , BlacklistType . CRAWLER ) ;
2009-07-24 13:54:04 +02:00
if ( response = = null ) {
2011-06-13 23:44:03 +02:00
this . request . setStatus ( " error " , WorkflowJob . STATUS_FINISHED ) ;
2012-01-03 17:49:37 +01:00
if ( CrawlQueues . this . log . isFine ( ) ) {
CrawlQueues . this . log . logFine ( " problem loading " + this . request . url ( ) . toString ( ) + " : no content (possibly caused by cache policy) " ) ;
}
2009-07-24 13:54:04 +02:00
result = " no content (possibly caused by cache policy) " ;
} else {
2011-06-13 23:44:03 +02:00
this . request . setStatus ( " loaded " , WorkflowJob . STATUS_RUNNING ) ;
final String storedFailMessage = CrawlQueues . this . sb . toIndexer ( response ) ;
this . request . setStatus ( " enqueued- " + ( ( storedFailMessage = = null ) ? " ok " : " fail " ) , WorkflowJob . STATUS_FINISHED ) ;
2009-10-06 23:52:55 +02:00
result = ( storedFailMessage = = null ) ? null : " not enqueued to indexer: " + storedFailMessage ;
2009-07-24 13:54:04 +02:00
}
2011-06-13 23:44:03 +02:00
} catch ( final IOException e ) {
this . request . setStatus ( " error " , WorkflowJob . STATUS_FINISHED ) ;
2012-01-03 17:49:37 +01:00
if ( CrawlQueues . this . log . isFine ( ) ) {
CrawlQueues . this . log . logFine ( " problem loading " + this . request . url ( ) . toString ( ) + " : " + e . getMessage ( ) ) ;
}
2009-07-23 23:31:51 +02:00
result = " load error - " + e . getMessage ( ) ;
}
2011-06-13 23:44:03 +02:00
2009-03-16 19:08:43 +01:00
if ( result ! = null ) {
2011-06-13 23:44:03 +02:00
CrawlQueues . this . errorURL . push (
2009-07-23 23:31:51 +02:00
this . request ,
2011-06-13 23:44:03 +02:00
ASCII . getBytes ( CrawlQueues . this . sb . peers . mySeed ( ) . hash ) ,
2009-03-16 19:08:43 +01:00
new Date ( ) ,
1 ,
2011-05-26 12:57:02 +02:00
FailCategory . TEMPORARY_NETWORK_FAILURE ,
2011-05-02 16:05:51 +02:00
" cannot load: " + result , - 1 ) ;
2009-10-11 02:12:19 +02:00
this . request . setStatus ( " worker-error " , WorkflowJob . STATUS_FINISHED ) ;
2009-03-16 19:08:43 +01:00
} else {
2009-10-11 02:12:19 +02:00
this . request . setStatus ( " worker-processed " , WorkflowJob . STATUS_FINISHED ) ;
2009-03-16 19:08:43 +01:00
}
}
} catch ( final Exception e ) {
2011-06-13 23:44:03 +02:00
CrawlQueues . this . errorURL . push (
2009-07-23 23:31:51 +02:00
this . request ,
2011-06-13 23:44:03 +02:00
ASCII . getBytes ( CrawlQueues . this . sb . peers . mySeed ( ) . hash ) ,
2009-03-16 19:08:43 +01:00
new Date ( ) ,
1 ,
2011-05-26 12:57:02 +02:00
FailCategory . TEMPORARY_NETWORK_FAILURE ,
2011-05-02 16:05:51 +02:00
e . getMessage ( ) + " - in worker " , - 1 ) ;
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2010-08-22 19:38:27 +02:00
// Client.initConnectionManager();
2009-10-11 02:12:19 +02:00
this . request . setStatus ( " worker-exception " , WorkflowJob . STATUS_FINISHED ) ;
2009-03-16 19:08:43 +01:00
} finally {
2011-06-13 23:44:03 +02:00
final Loader w = CrawlQueues . this . workers . remove ( this . code ) ;
2009-04-15 09:37:36 +02:00
assert w ! = null ;
2012-01-04 23:02:12 +01:00
}
2012-01-04 23:01:04 +01:00
}
2012-01-04 22:08:17 +01:00
}
2009-03-16 19:08:43 +01:00
}