2009-05-10 12:54:06 +02:00
// kelondroMSetTools.java
2005-04-07 21:19:42 +02:00
// -------------------------------------
2008-07-20 19:14:51 +02:00
// (C) by Michael Peter Christen; mc@yacy.net
2005-04-07 21:19:42 +02:00
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 28.12.2004
//
2009-05-10 12:54:06 +02:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
2005-04-07 21:19:42 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-10-10 03:14:19 +02:00
package net.yacy.kelondro.util ;
2005-04-07 21:19:42 +02:00
2005-07-12 17:09:35 +02:00
import java.io.BufferedReader ;
2006-09-30 00:27:20 +02:00
import java.io.File ;
2005-07-12 17:09:35 +02:00
import java.io.FileInputStream ;
2006-09-30 00:27:20 +02:00
import java.io.IOException ;
import java.io.InputStreamReader ;
2007-02-24 14:56:32 +01:00
import java.util.ArrayList ;
2006-09-12 13:13:27 +02:00
import java.util.Collection ;
2005-05-05 07:32:19 +02:00
import java.util.Comparator ;
2010-11-03 21:58:50 +01:00
import java.util.ConcurrentModificationException ;
2005-05-05 07:32:19 +02:00
import java.util.Iterator ;
2010-11-28 03:57:31 +01:00
import java.util.List ;
2006-09-13 19:13:28 +02:00
import java.util.Map ;
2006-09-30 00:27:20 +02:00
import java.util.Set ;
2010-11-28 03:57:31 +01:00
import java.util.SortedMap ;
import java.util.SortedSet ;
2005-05-05 07:32:19 +02:00
import java.util.TreeMap ;
import java.util.TreeSet ;
2005-04-07 21:19:42 +02:00
2011-03-07 21:36:40 +01:00
import net.yacy.cora.document.UTF8 ;
2012-07-27 12:13:53 +02:00
import net.yacy.cora.storage.HandleSet ;
2010-11-03 21:58:50 +01:00
import net.yacy.kelondro.logging.Log ;
2010-04-15 15:22:59 +02:00
2010-11-28 03:57:31 +01:00
public final class SetTools {
2005-04-07 21:19:42 +02:00
2012-02-01 18:13:31 +01:00
2006-01-11 01:32:44 +01:00
//public static Comparator fastStringComparator = fastStringComparator(true);
2005-04-07 21:19:42 +02:00
// ------------------------------------------------------------------------------------------------
// helper methods
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
public static int log2a ( int x ) {
2005-06-23 04:07:45 +02:00
// this computes 1 + log2
2010-04-15 15:22:59 +02:00
// it is the number of bits in x, not the logarithm by 2
2008-01-11 01:12:01 +01:00
int l = 0 ;
while ( x > 0 ) { x = x > > > 1 ; l + + ; }
return l ;
2005-04-07 21:19:42 +02:00
}
// ------------------------------------------------------------------------------------------------
// join
// We distinguish two principal solutions
// - constructive join (generate new data structure)
// - destructive join (remove non-valid elements from given data structure)
2007-12-28 19:47:45 +01:00
// The algorithm to perform the join can be also of two kind:
// - join by pairwise enumeration
2005-04-07 21:19:42 +02:00
// - join by iterative tests (where we distinguish left-right and right-left tests)
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
public static < A , B > SortedMap < A , B > joinConstructive ( final Collection < SortedMap < A , B > > maps , final boolean concatStrings ) {
2006-09-12 13:13:27 +02:00
// this joins all TreeMap(s) contained in maps
2012-02-01 18:13:31 +01:00
2006-09-12 13:13:27 +02:00
// first order entities by their size
2010-11-28 03:57:31 +01:00
final SortedMap < Long , SortedMap < A , B > > orderMap = new TreeMap < Long , SortedMap < A , B > > ( ) ;
SortedMap < A , B > singleMap ;
final Iterator < SortedMap < A , B > > i = maps . iterator ( ) ;
2006-09-12 13:13:27 +02:00
int count = 0 ;
while ( i . hasNext ( ) ) {
// get next entity:
2007-12-28 19:47:45 +01:00
singleMap = i . next ( ) ;
2012-02-01 18:13:31 +01:00
2006-09-12 13:13:27 +02:00
// check result
2009-12-02 01:37:59 +01:00
if ( ( singleMap = = null ) | | ( singleMap . isEmpty ( ) ) ) return new TreeMap < A , B > ( ) ;
2012-02-01 18:13:31 +01:00
2006-09-12 13:13:27 +02:00
// store result in order of result size
2008-08-06 21:43:12 +02:00
orderMap . put ( Long . valueOf ( singleMap . size ( ) * 1000 + count ) , singleMap ) ;
2006-09-12 13:13:27 +02:00
count + + ;
}
2012-02-01 18:13:31 +01:00
2006-09-12 13:13:27 +02:00
// check if there is any result
2009-12-02 01:37:59 +01:00
if ( orderMap . isEmpty ( ) ) return new TreeMap < A , B > ( ) ;
2012-02-01 18:13:31 +01:00
2006-09-12 13:13:27 +02:00
// we now must pairwise build up a conjunction of these maps
2008-06-06 18:01:27 +02:00
Long k = orderMap . firstKey ( ) ; // the smallest, which means, the one with the least entries
2010-11-28 03:57:31 +01:00
SortedMap < A , B > mapA , mapB , joinResult = orderMap . remove ( k ) ;
2009-12-02 01:37:59 +01:00
while ( ! orderMap . isEmpty ( ) & & ! joinResult . isEmpty ( ) ) {
2006-09-12 13:13:27 +02:00
// take the first element of map which is a result and combine it with result
2008-06-06 18:01:27 +02:00
k = orderMap . firstKey ( ) ; // the next smallest...
2006-09-12 13:13:27 +02:00
mapA = joinResult ;
2008-06-06 18:01:27 +02:00
mapB = orderMap . remove ( k ) ;
2007-08-02 02:42:37 +02:00
joinResult = joinConstructiveByTest ( mapA , mapB , concatStrings ) ; // TODO: better with enumeration?
2006-09-12 13:13:27 +02:00
// free resources
mapA = null ;
mapB = null ;
}
// in 'searchResult' is now the combined search result
2009-12-02 01:37:59 +01:00
if ( joinResult . isEmpty ( ) ) return new TreeMap < A , B > ( ) ;
2006-09-12 13:13:27 +02:00
return joinResult ;
}
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
public static < A , B > SortedMap < A , B > joinConstructive ( final SortedMap < A , B > map1 , final SortedMap < A , B > map2 , final boolean concatStrings ) {
2006-09-13 19:13:28 +02:00
// comparators must be equal
if ( ( map1 = = null ) | | ( map2 = = null ) ) return null ;
if ( map1 . comparator ( ) ! = map2 . comparator ( ) ) return null ;
2009-12-02 01:37:59 +01:00
if ( map1 . isEmpty ( ) | | map2 . isEmpty ( ) ) return new TreeMap < A , B > ( map1 . comparator ( ) ) ;
2005-04-07 21:19:42 +02:00
2006-09-13 19:13:28 +02:00
// decide which method to use
2008-08-02 14:12:04 +02:00
final int high = ( ( map1 . size ( ) > map2 . size ( ) ) ? map1 . size ( ) : map2 . size ( ) ) ;
final int low = ( ( map1 . size ( ) > map2 . size ( ) ) ? map2 . size ( ) : map1 . size ( ) ) ;
final int stepsEnum = 10 * ( high + low - 1 ) ;
final int stepsTest = 12 * log2a ( high ) * low ;
2005-04-07 21:19:42 +02:00
2006-09-13 19:13:28 +02:00
// start most efficient method
if ( stepsEnum > stepsTest ) {
if ( map1 . size ( ) > map2 . size ( ) ) return joinConstructiveByTest ( map2 , map1 , concatStrings ) ;
return joinConstructiveByTest ( map1 , map2 , concatStrings ) ;
}
return joinConstructiveByEnumeration ( map1 , map2 , concatStrings ) ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
2008-01-19 01:40:19 +01:00
@SuppressWarnings ( " unchecked " )
2010-11-28 03:57:31 +01:00
private static < A , B > SortedMap < A , B > joinConstructiveByTest ( final SortedMap < A , B > small , final SortedMap < A , B > large , final boolean concatStrings ) {
final SortedMap < A , B > result = new TreeMap < A , B > ( large . comparator ( ) ) ;
2011-02-24 11:32:46 +01:00
synchronized ( small ) {
final Iterator < Map . Entry < A , B > > mi = small . entrySet ( ) . iterator ( ) ;
2010-11-03 21:58:50 +01:00
Map . Entry < A , B > mentry1 ;
B mobj2 ;
loop : while ( mi . hasNext ( ) ) {
try {
mentry1 = mi . next ( ) ;
2011-02-24 11:32:46 +01:00
synchronized ( large ) {
mobj2 = large . get ( mentry1 . getKey ( ) ) ;
}
2010-11-03 21:58:50 +01:00
if ( mobj2 ! = null ) {
if ( mentry1 . getValue ( ) instanceof String ) {
2011-03-08 00:12:39 +01:00
result . put ( mentry1 . getKey ( ) , ( B ) ( ( concatStrings ) ? ( ( ( String ) mentry1 . getValue ( ) ) + ( String ) mobj2 ) : ( String ) mentry1 . getValue ( ) ) ) ;
} else if ( mentry1 . getValue ( ) instanceof StringBuilder ) {
result . put ( mentry1 . getKey ( ) , ( B ) ( ( concatStrings ) ? ( ( ( StringBuilder ) mentry1 . getValue ( ) ) . append ( ( StringBuilder ) mobj2 ) ) : mentry1 . getValue ( ) ) ) ;
2010-11-03 21:58:50 +01:00
} else {
result . put ( mentry1 . getKey ( ) , mentry1 . getValue ( ) ) ;
}
}
} catch ( ConcurrentModificationException e ) {
Log . logWarning ( " SetTools " , e . getMessage ( ) , e ) ;
break loop ;
2007-12-28 19:47:45 +01:00
}
}
2006-09-13 19:13:28 +02:00
}
return result ;
2005-04-07 21:19:42 +02:00
}
2008-01-19 01:40:19 +01:00
@SuppressWarnings ( " unchecked " )
2010-11-28 03:57:31 +01:00
private static < A , B > SortedMap < A , B > joinConstructiveByEnumeration ( final SortedMap < A , B > map1 , final SortedMap < A , B > map2 , final boolean concatStrings ) {
2007-12-28 19:47:45 +01:00
// implement pairwise enumeration
2008-08-02 14:12:04 +02:00
final Comparator < ? super A > comp = map1 . comparator ( ) ;
final Iterator < Map . Entry < A , B > > mi1 = map1 . entrySet ( ) . iterator ( ) ;
final Iterator < Map . Entry < A , B > > mi2 = map2 . entrySet ( ) . iterator ( ) ;
2010-11-28 03:57:31 +01:00
final SortedMap < A , B > result = new TreeMap < A , B > ( map1 . comparator ( ) ) ;
2006-09-13 19:13:28 +02:00
int c ;
if ( ( mi1 . hasNext ( ) ) & & ( mi2 . hasNext ( ) ) ) {
2007-12-28 19:47:45 +01:00
Map . Entry < A , B > mentry1 = mi1 . next ( ) ;
Map . Entry < A , B > mentry2 = mi2 . next ( ) ;
2006-09-13 19:13:28 +02:00
while ( true ) {
2008-01-11 01:12:01 +01:00
c = comp . compare ( mentry1 . getKey ( ) , mentry2 . getKey ( ) ) ;
2006-09-13 19:13:28 +02:00
if ( c < 0 ) {
2007-12-28 19:47:45 +01:00
if ( mi1 . hasNext ( ) ) mentry1 = mi1 . next ( ) ; else break ;
2006-09-13 19:13:28 +02:00
} else if ( c > 0 ) {
2007-12-28 19:47:45 +01:00
if ( mi2 . hasNext ( ) ) mentry2 = mi2 . next ( ) ; else break ;
2006-09-13 19:13:28 +02:00
} else {
2007-12-28 19:47:45 +01:00
if ( mentry1 . getValue ( ) instanceof String ) {
2011-03-08 00:12:39 +01:00
result . put ( mentry1 . getKey ( ) , ( B ) ( ( concatStrings ) ? ( ( ( String ) mentry1 . getValue ( ) ) + ( String ) mentry2 . getValue ( ) ) : ( String ) mentry1 . getValue ( ) ) ) ;
} else if ( mentry1 . getValue ( ) instanceof StringBuilder ) {
result . put ( mentry1 . getKey ( ) , ( B ) ( ( concatStrings ) ? ( ( ( StringBuilder ) mentry1 . getValue ( ) ) . append ( ( StringBuilder ) mentry2 . getValue ( ) ) ) : ( StringBuilder ) mentry1 . getValue ( ) ) ) ;
2007-12-28 19:47:45 +01:00
} else {
result . put ( mentry1 . getKey ( ) , mentry1 . getValue ( ) ) ;
}
if ( mi1 . hasNext ( ) ) mentry1 = mi1 . next ( ) ; else break ;
if ( mi2 . hasNext ( ) ) mentry2 = mi2 . next ( ) ; else break ;
2006-09-13 19:13:28 +02:00
}
}
}
return result ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
2005-04-07 21:19:42 +02:00
// now the same for set-set
2010-11-28 03:57:31 +01:00
public static < A > SortedSet < A > joinConstructive ( final SortedSet < A > set1 , final SortedSet < A > set2 ) {
2008-01-11 01:12:01 +01:00
// comparators must be equal
2005-04-07 21:19:42 +02:00
if ( ( set1 = = null ) | | ( set2 = = null ) ) return null ;
2008-01-11 01:12:01 +01:00
if ( set1 . comparator ( ) ! = set2 . comparator ( ) ) return null ;
2009-12-02 01:37:59 +01:00
if ( set1 . isEmpty ( ) | | set2 . isEmpty ( ) ) return new TreeSet < A > ( set1 . comparator ( ) ) ;
2005-04-07 21:19:42 +02:00
2008-01-11 01:12:01 +01:00
// decide which method to use
2008-08-02 14:12:04 +02:00
final int high = ( ( set1 . size ( ) > set2 . size ( ) ) ? set1 . size ( ) : set2 . size ( ) ) ;
final int low = ( ( set1 . size ( ) > set2 . size ( ) ) ? set2 . size ( ) : set1 . size ( ) ) ;
final int stepsEnum = 10 * ( high + low - 1 ) ;
final int stepsTest = 12 * log2a ( high ) * low ;
2005-04-07 21:19:42 +02:00
2008-01-11 01:12:01 +01:00
// start most efficient method
if ( stepsEnum > stepsTest ) {
if ( set1 . size ( ) < set2 . size ( ) ) return joinConstructiveByTest ( set1 , set2 ) ;
return joinConstructiveByTest ( set2 , set1 ) ;
}
return joinConstructiveByEnumeration ( set1 , set2 ) ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
public static < A > SortedSet < A > joinConstructiveByTest ( final Collection < A > small , final SortedSet < A > large ) {
2008-08-02 14:12:04 +02:00
final Iterator < A > mi = small . iterator ( ) ;
2012-02-01 18:13:31 +01:00
final SortedSet < A > result = new TreeSet < A > ( large . comparator ( ) ) ;
2008-01-11 01:12:01 +01:00
A o ;
while ( mi . hasNext ( ) ) {
o = mi . next ( ) ;
if ( large . contains ( o ) ) result . add ( o ) ;
}
return result ;
2005-04-07 21:19:42 +02:00
}
2010-11-28 03:57:31 +01:00
private static < A > SortedSet < A > joinConstructiveByEnumeration ( final SortedSet < A > set1 , final SortedSet < A > set2 ) {
2010-04-15 15:22:59 +02:00
// implement pairwise enumeration
2008-08-02 14:12:04 +02:00
final Comparator < ? super A > comp = set1 . comparator ( ) ;
final Iterator < A > mi = set1 . iterator ( ) ;
final Iterator < A > si = set2 . iterator ( ) ;
2010-11-28 03:57:31 +01:00
final SortedSet < A > result = new TreeSet < A > ( set1 . comparator ( ) ) ;
2008-01-11 01:12:01 +01:00
int c ;
if ( ( mi . hasNext ( ) ) & & ( si . hasNext ( ) ) ) {
A mobj = mi . next ( ) ;
A sobj = si . next ( ) ;
while ( true ) {
c = comp . compare ( mobj , sobj ) ;
if ( c < 0 ) {
if ( mi . hasNext ( ) ) mobj = mi . next ( ) ; else break ;
} else if ( c > 0 ) {
if ( si . hasNext ( ) ) sobj = si . next ( ) ; else break ;
} else {
result . add ( mobj ) ;
if ( mi . hasNext ( ) ) mobj = mi . next ( ) ; else break ;
if ( si . hasNext ( ) ) sobj = si . next ( ) ; else break ;
}
}
}
return result ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
2010-06-22 14:28:53 +02:00
/ * *
* test if one set is totally included in another set
* @param < A >
* @param small
* @param large
* @return true if the small set is completely included in the large set
* /
2010-11-28 03:57:31 +01:00
public static < A > boolean totalInclusion ( final Set < A > small , final Set < A > large ) {
2010-06-22 14:28:53 +02:00
for ( A o : small ) {
if ( ! large . contains ( o ) ) return false ;
}
return true ;
2012-02-01 18:13:31 +01:00
}
2010-06-22 14:28:53 +02:00
/ * *
* test if one set is totally included in another set
* @param small
* @param large
* @return true if the small set is completely included in the large set
* /
2010-11-28 03:57:31 +01:00
public static boolean totalInclusion ( final HandleSet small , final HandleSet large ) {
2010-06-22 14:28:53 +02:00
for ( byte [ ] handle : small ) {
if ( ! large . has ( handle ) ) return false ;
}
return true ;
2012-02-01 18:13:31 +01:00
}
2010-06-22 14:28:53 +02:00
/ * *
* test if the intersection of two sets is not empty
* @param < A >
* @param set1
* @param set2
* @return true if any element of the first set is part of the second set or vice - versa
* /
2010-11-28 03:57:31 +01:00
public static < A > boolean anymatch ( final SortedSet < A > set1 , final SortedSet < A > set2 ) {
2007-04-05 12:14:48 +02:00
// comparators must be equal
if ( ( set1 = = null ) | | ( set2 = = null ) ) return false ;
if ( set1 . comparator ( ) ! = set2 . comparator ( ) ) return false ;
2009-12-02 01:37:59 +01:00
if ( set1 . isEmpty ( ) | | set2 . isEmpty ( ) ) return false ;
2007-04-05 12:14:48 +02:00
// decide which method to use
2008-08-02 14:12:04 +02:00
final int high = ( ( set1 . size ( ) > set2 . size ( ) ) ? set1 . size ( ) : set2 . size ( ) ) ;
final int low = ( ( set1 . size ( ) > set2 . size ( ) ) ? set2 . size ( ) : set1 . size ( ) ) ;
final int stepsEnum = 10 * ( high + low - 1 ) ;
final int stepsTest = 12 * log2a ( high ) * low ;
2007-04-05 12:14:48 +02:00
// start most efficient method
if ( stepsEnum > stepsTest ) {
if ( set1 . size ( ) < set2 . size ( ) ) return anymatchByTest ( set1 , set2 ) ;
return anymatchByTest ( set2 , set1 ) ;
}
return anymatchByEnumeration ( set1 , set2 ) ;
}
2010-06-22 14:28:53 +02:00
/ * *
* test if the intersection of two sets is not empty
* @param set1
* @param set2
* @return true if any element of the first set is part of the second set or vice - versa
* /
2010-11-28 03:57:31 +01:00
public static boolean anymatch ( final HandleSet set1 , final HandleSet set2 ) {
2010-04-15 15:22:59 +02:00
// comparators must be equal
if ( ( set1 = = null ) | | ( set2 = = null ) ) return false ;
if ( set1 . comparator ( ) ! = set2 . comparator ( ) ) return false ;
if ( set1 . isEmpty ( ) | | set2 . isEmpty ( ) ) return false ;
// decide which method to use
final int high = ( ( set1 . size ( ) > set2 . size ( ) ) ? set1 . size ( ) : set2 . size ( ) ) ;
final int low = ( ( set1 . size ( ) > set2 . size ( ) ) ? set2 . size ( ) : set1 . size ( ) ) ;
final int stepsEnum = 10 * ( high + low - 1 ) ;
final int stepsTest = 12 * log2a ( high ) * low ;
// start most efficient method
if ( stepsEnum > stepsTest ) {
if ( set1 . size ( ) < set2 . size ( ) ) return anymatchByTest ( set1 , set2 ) ;
return anymatchByTest ( set2 , set1 ) ;
}
return anymatchByEnumeration ( set1 , set2 ) ;
}
2010-11-28 03:57:31 +01:00
private static < A > boolean anymatchByTest ( final SortedSet < A > small , final SortedSet < A > large ) {
2010-04-15 15:22:59 +02:00
final Iterator < A > mi = small . iterator ( ) ;
A o ;
while ( mi . hasNext ( ) ) {
o = mi . next ( ) ;
if ( large . contains ( o ) ) return true ;
}
return false ;
}
2010-11-28 03:57:31 +01:00
private static boolean anymatchByTest ( final HandleSet small , final HandleSet large ) {
2010-04-15 15:22:59 +02:00
final Iterator < byte [ ] > mi = small . iterator ( ) ;
byte [ ] o ;
while ( mi . hasNext ( ) ) {
o = mi . next ( ) ;
if ( large . has ( o ) ) return true ;
}
return false ;
}
2007-04-05 12:14:48 +02:00
2010-11-28 03:57:31 +01:00
private static < A > boolean anymatchByEnumeration ( final SortedSet < A > set1 , final SortedSet < A > set2 ) {
2010-04-15 15:22:59 +02:00
// implement pairwise enumeration
final Comparator < ? super A > comp = set1 . comparator ( ) ;
final Iterator < A > mi = set1 . iterator ( ) ;
final Iterator < A > si = set2 . iterator ( ) ;
int c ;
if ( ( mi . hasNext ( ) ) & & ( si . hasNext ( ) ) ) {
A mobj = mi . next ( ) ;
A sobj = si . next ( ) ;
while ( true ) {
c = comp . compare ( mobj , sobj ) ;
if ( c < 0 ) {
if ( mi . hasNext ( ) ) mobj = mi . next ( ) ; else break ;
} else if ( c > 0 ) {
if ( si . hasNext ( ) ) sobj = si . next ( ) ; else break ;
} else {
return true ;
}
}
}
return false ;
}
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
private static boolean anymatchByEnumeration ( final HandleSet set1 , final HandleSet set2 ) {
2010-04-15 15:22:59 +02:00
// implement pairwise enumeration
final Comparator < byte [ ] > comp = set1 . comparator ( ) ;
final Iterator < byte [ ] > mi = set1 . iterator ( ) ;
final Iterator < byte [ ] > si = set2 . iterator ( ) ;
int c ;
if ( ( mi . hasNext ( ) ) & & ( si . hasNext ( ) ) ) {
byte [ ] mobj = mi . next ( ) ;
byte [ ] sobj = si . next ( ) ;
while ( true ) {
c = comp . compare ( mobj , sobj ) ;
if ( c < 0 ) {
if ( mi . hasNext ( ) ) mobj = mi . next ( ) ; else break ;
} else if ( c > 0 ) {
if ( si . hasNext ( ) ) sobj = si . next ( ) ; else break ;
} else {
return true ;
}
}
}
return false ;
}
2012-02-01 18:13:31 +01:00
2005-04-07 21:19:42 +02:00
// ------------------------------------------------------------------------------------------------
// exclude
2009-08-28 15:28:11 +02:00
/ *
public static < A , B > TreeMap < A , B > excludeConstructive ( final TreeMap < A , B > map , final Set < A > set ) {
2005-04-07 21:19:42 +02:00
if ( map = = null ) return null ;
if ( set = = null ) return map ;
2009-12-02 01:37:59 +01:00
if ( map . isEmpty ( ) | | set . isEmpty ( ) ) return map ;
2009-08-28 15:28:11 +02:00
assert ! ( set instanceof TreeSet ) | | map . comparator ( ) = = ( ( TreeSet < A > ) set ) . comparator ( ) ;
// if (map.comparator() != set.comparator()) return excludeConstructiveByTestMapInSet(map, set);
2005-04-07 21:19:42 +02:00
return excludeConstructiveByTestMapInSet ( map , set ) ;
2006-03-16 23:46:32 +01:00
// return excludeConstructiveByEnumeration(map, set);
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
2009-08-28 15:28:11 +02:00
private static < A , B > TreeMap < A , B > excludeConstructiveByTestMapInSet ( final TreeMap < A , B > map , final Set < A > set ) {
2008-08-02 14:12:04 +02:00
final TreeMap < A , B > result = new TreeMap < A , B > ( map . comparator ( ) ) ;
2008-01-11 01:12:01 +01:00
A o ;
2008-08-20 09:54:56 +02:00
for ( Entry < A , B > entry : map . entrySet ( ) ) {
o = entry . getKey ( ) ;
if ( ! ( set . contains ( o ) ) ) result . put ( o , entry . getValue ( ) ) ;
2006-03-16 23:46:32 +01:00
}
return result ;
2005-04-07 21:19:42 +02:00
}
2009-08-28 15:28:11 +02:00
* /
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
public static < A , B > void excludeDestructive ( final Map < A , B > map , final Set < A > set ) {
2005-04-07 21:19:42 +02:00
// comparators must be equal
if ( map = = null ) return ;
if ( set = = null ) return ;
2010-11-28 03:57:31 +01:00
assert ! ( map instanceof SortedMap < ? , ? > & & set instanceof SortedSet < ? > ) | | ( ( SortedMap < A , B > ) map ) . comparator ( ) = = ( ( SortedSet < A > ) set ) . comparator ( ) ;
2009-12-02 01:37:59 +01:00
if ( map . isEmpty ( ) | | set . isEmpty ( ) ) return ;
2006-03-16 23:46:32 +01:00
2005-04-07 21:19:42 +02:00
if ( map . size ( ) < set . size ( ) )
excludeDestructiveByTestMapInSet ( map , set ) ;
else
excludeDestructiveByTestSetInMap ( map , set ) ;
}
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
private static < A , B > void excludeDestructiveByTestMapInSet ( final Map < A , B > map , final Set < A > set ) {
2008-08-02 14:12:04 +02:00
final Iterator < A > mi = map . keySet ( ) . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( mi . hasNext ( ) ) if ( set . contains ( mi . next ( ) ) ) mi . remove ( ) ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
private static < A , B > void excludeDestructiveByTestSetInMap ( final Map < A , B > map , final Set < A > set ) {
2008-08-02 14:12:04 +02:00
final Iterator < A > si = set . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( si . hasNext ( ) ) map . remove ( si . next ( ) ) ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
2005-04-07 21:19:42 +02:00
// and the same again with set-set
2010-11-28 03:57:31 +01:00
public static < A > void excludeDestructive ( final Set < A > set1 , final Set < A > set2 ) {
2005-04-07 21:19:42 +02:00
if ( set1 = = null ) return ;
if ( set2 = = null ) return ;
2010-11-28 03:57:31 +01:00
assert ! ( set1 instanceof SortedSet < ? > & & set2 instanceof SortedSet < ? > ) | | ( ( SortedSet < A > ) set1 ) . comparator ( ) = = ( ( SortedSet < A > ) set2 ) . comparator ( ) ;
2009-12-02 01:37:59 +01:00
if ( set1 . isEmpty ( ) | | set2 . isEmpty ( ) ) return ;
2012-02-01 18:13:31 +01:00
2005-04-07 21:19:42 +02:00
if ( set1 . size ( ) < set2 . size ( ) )
excludeDestructiveByTestSmallInLarge ( set1 , set2 ) ;
else
excludeDestructiveByTestLargeInSmall ( set1 , set2 ) ;
}
2012-02-01 18:13:31 +01:00
public static < A > void excludeDestructiveByTestSmallInLarge ( final Collection < A > small , final Set < A > large ) {
2008-08-02 14:12:04 +02:00
final Iterator < A > mi = small . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( mi . hasNext ( ) ) if ( large . contains ( mi . next ( ) ) ) mi . remove ( ) ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
public static < A > void excludeDestructiveByTestLargeInSmall ( final Set < A > large , final Collection < A > small ) {
2008-08-02 14:12:04 +02:00
final Iterator < A > si = small . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( si . hasNext ( ) ) large . remove ( si . next ( ) ) ;
2005-04-07 21:19:42 +02:00
}
2012-02-01 18:13:31 +01:00
2005-04-07 21:19:42 +02:00
// ------------------------------------------------------------------------------------------------
2010-11-28 03:57:31 +01:00
public static SortedMap < String , String > loadMap ( final String filename , final String sep ) {
final SortedMap < String , String > map = new TreeMap < String , String > ( ) ;
2005-07-12 17:09:35 +02:00
BufferedReader br = null ;
try {
br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( filename ) ) ) ;
String line ;
int pos ;
while ( ( line = br . readLine ( ) ) ! = null ) {
line = line . trim ( ) ;
2012-07-10 22:59:03 +02:00
if ( ( ! line . isEmpty ( ) & & line . charAt ( 0 ) ! = '#' ) & & ( ( pos = line . indexOf ( sep ) ) > 0 ) )
2005-07-12 17:09:35 +02:00
map . put ( line . substring ( 0 , pos ) . trim ( ) . toLowerCase ( ) , line . substring ( pos + sep . length ( ) ) . trim ( ) ) ;
}
2012-02-01 18:13:31 +01:00
} catch ( final IOException e ) {
2005-07-12 17:09:35 +02:00
} finally {
2008-08-02 14:12:04 +02:00
if ( br ! = null ) try { br . close ( ) ; } catch ( final Exception e ) { }
2005-07-12 17:09:35 +02:00
}
return map ;
}
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
public static SortedMap < String , List < String > > loadMapMultiValsPerKey ( final String filename , final String sep ) {
final SortedMap < String , List < String > > map = new TreeMap < String , List < String > > ( ) ;
2007-02-24 14:56:32 +01:00
BufferedReader br = null ;
try {
br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( filename ) ) ) ;
String line , key , value ;
int pos ;
while ( ( line = br . readLine ( ) ) ! = null ) {
line = line . trim ( ) ;
2012-07-10 22:59:03 +02:00
if ( ( ! line . isEmpty ( ) & & line . charAt ( 0 ) ! = '#' ) & & ( ( pos = line . indexOf ( sep ) ) > 0 ) ) {
2007-02-24 14:56:32 +01:00
key = line . substring ( 0 , pos ) . trim ( ) . toLowerCase ( ) ;
value = line . substring ( pos + sep . length ( ) ) . trim ( ) ;
2008-01-11 01:12:01 +01:00
if ( ! map . containsKey ( key ) ) map . put ( key , new ArrayList < String > ( ) ) ;
map . get ( key ) . add ( value ) ;
2007-02-24 14:56:32 +01:00
}
}
2012-02-01 18:13:31 +01:00
} catch ( final IOException e ) {
2007-02-24 14:56:32 +01:00
} finally {
2008-08-02 14:12:04 +02:00
if ( br ! = null ) try { br . close ( ) ; } catch ( final Exception e ) { }
2007-02-24 14:56:32 +01:00
}
return map ;
}
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
public static SortedSet < String > loadList ( final File file , final Comparator < String > c ) {
final SortedSet < String > list = new TreeSet < String > ( c ) ;
2005-07-12 17:09:35 +02:00
if ( ! ( file . exists ( ) ) ) return list ;
2012-02-01 18:13:31 +01:00
2005-07-12 17:09:35 +02:00
BufferedReader br = null ;
try {
br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( file ) ) ) ;
String line ;
while ( ( line = br . readLine ( ) ) ! = null ) {
line = line . trim ( ) ;
2012-07-10 22:59:03 +02:00
if ( ! line . isEmpty ( ) & & line . charAt ( 0 ) ! = '#' ) list . add ( line . trim ( ) . toLowerCase ( ) ) ;
2005-07-12 17:09:35 +02:00
}
br . close ( ) ;
2012-02-01 18:13:31 +01:00
} catch ( final IOException e ) {
2005-07-12 17:09:35 +02:00
} finally {
2008-08-02 14:12:04 +02:00
if ( br ! = null ) try { br . close ( ) ; } catch ( final Exception e ) { }
2005-07-12 17:09:35 +02:00
}
return list ;
}
2009-05-10 12:54:06 +02:00
2010-11-28 03:57:31 +01:00
public static String setToString ( final HandleSet set , final char separator ) {
2009-04-16 17:29:00 +02:00
final Iterator < byte [ ] > i = set . iterator ( ) ;
final StringBuilder sb = new StringBuilder ( set . size ( ) * 7 ) ;
2011-03-07 21:36:40 +01:00
if ( i . hasNext ( ) ) sb . append ( UTF8 . String ( i . next ( ) ) ) ;
2009-04-16 17:29:00 +02:00
while ( i . hasNext ( ) ) {
2011-03-07 21:36:40 +01:00
sb . append ( separator ) . append ( UTF8 . String ( i . next ( ) ) ) ;
2009-04-16 17:29:00 +02:00
}
2009-05-10 12:54:06 +02:00
return sb . toString ( ) ;
2009-04-16 17:29:00 +02:00
}
2012-02-01 18:13:31 +01:00
2010-11-28 03:57:31 +01:00
public static String setToString ( final Set < String > set , final char separator ) {
2008-08-02 14:12:04 +02:00
final Iterator < String > i = set . iterator ( ) ;
2008-12-04 13:54:16 +01:00
final StringBuilder sb = new StringBuilder ( set . size ( ) * 7 ) ;
2008-01-11 01:12:01 +01:00
if ( i . hasNext ( ) ) sb . append ( i . next ( ) ) ;
2007-03-13 23:18:36 +01:00
while ( i . hasNext ( ) ) {
2008-01-11 01:12:01 +01:00
sb . append ( separator ) . append ( i . next ( ) ) ;
2007-03-13 23:18:36 +01:00
}
2009-05-10 12:54:06 +02:00
return sb . toString ( ) ;
2007-03-13 23:18:36 +01:00
}
2009-05-10 12:54:06 +02:00
2005-07-12 17:09:35 +02:00
// ------------------------------------------------------------------------------------------------
2012-02-01 18:13:31 +01:00
2008-08-02 14:12:04 +02:00
public static void main ( final String [ ] args ) {
2010-11-28 03:57:31 +01:00
final SortedMap < String , String > m = new TreeMap < String , String > ( ) ;
final SortedMap < String , String > s = new TreeMap < String , String > ( ) ;
2005-04-07 21:19:42 +02:00
m . put ( " a " , " a " ) ;
m . put ( " x " , " x " ) ;
m . put ( " f " , " f " ) ;
m . put ( " h " , " h " ) ;
m . put ( " w " , " w " ) ;
m . put ( " 7 " , " 7 " ) ;
m . put ( " t " , " t " ) ;
m . put ( " k " , " k " ) ;
m . put ( " y " , " y " ) ;
m . put ( " z " , " z " ) ;
2006-09-13 19:13:28 +02:00
s . put ( " a " , " a " ) ;
s . put ( " b " , " b " ) ;
s . put ( " c " , " c " ) ;
s . put ( " k " , " k " ) ;
s . put ( " l " , " l " ) ;
s . put ( " m " , " m " ) ;
s . put ( " n " , " n " ) ;
s . put ( " o " , " o " ) ;
s . put ( " p " , " p " ) ;
s . put ( " q " , " q " ) ;
s . put ( " r " , " r " ) ;
s . put ( " s " , " s " ) ;
s . put ( " t " , " t " ) ;
s . put ( " x " , " x " ) ;
2005-04-07 21:19:42 +02:00
System . out . println ( " Compare " + m . toString ( ) + " with " + s . toString ( ) ) ;
2006-09-13 19:13:28 +02:00
System . out . println ( " Join= " + joinConstructiveByEnumeration ( m , s , true ) ) ;
System . out . println ( " Join= " + joinConstructiveByTest ( m , s , true ) ) ;
System . out . println ( " Join= " + joinConstructiveByTest ( m , s , true ) ) ;
System . out . println ( " Join= " + joinConstructive ( m , s , true ) ) ;
2008-01-11 01:12:01 +01:00
//System.out.println("Exclude=" + excludeConstructiveByTestMapInSet(m, s.keySet()));
2005-04-07 21:19:42 +02:00
/ *
for ( int low = 0 ; low < 10 ; low + + )
for ( int high = 0 ; high < 100 ; high = high + 10 ) {
int stepsEnum = 10 * high ;
int stepsTest = 12 * log2 ( high ) * low ;
System . out . println ( " low= " + low + " , high= " + high + " , stepsEnum= " + stepsEnum + " , stepsTest= " + stepsTest + " ; best method is " + ( ( stepsEnum < stepsTest ) ? " joinByEnumeration " : " joinByTest " ) ) ;
}
* /
}
}