2005-04-07 21:19:42 +02:00
// kelondroMSetTools.java
// -------------------------------------
2008-07-20 19:14:51 +02:00
// (C) by Michael Peter Christen; mc@yacy.net
2005-04-07 21:19:42 +02:00
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 28.12.2004
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-01-30 23:44:20 +01:00
package de.anomic.kelondro.util ;
2005-04-07 21:19:42 +02:00
2005-07-12 17:09:35 +02:00
import java.io.BufferedReader ;
2006-09-30 00:27:20 +02:00
import java.io.File ;
2005-07-12 17:09:35 +02:00
import java.io.FileInputStream ;
2006-09-30 00:27:20 +02:00
import java.io.IOException ;
import java.io.InputStreamReader ;
2007-02-24 14:56:32 +01:00
import java.util.ArrayList ;
2006-09-12 13:13:27 +02:00
import java.util.Collection ;
2005-05-05 07:32:19 +02:00
import java.util.Comparator ;
import java.util.Iterator ;
2006-09-13 19:13:28 +02:00
import java.util.Map ;
2006-09-30 00:27:20 +02:00
import java.util.Set ;
2005-05-05 07:32:19 +02:00
import java.util.TreeMap ;
import java.util.TreeSet ;
2008-08-20 09:54:56 +02:00
import java.util.Map.Entry ;
2005-04-07 21:19:42 +02:00
2009-01-30 16:33:00 +01:00
public class SetTools {
2005-04-07 21:19:42 +02:00
2006-01-11 01:32:44 +01:00
//public static Comparator fastStringComparator = fastStringComparator(true);
2005-04-07 21:19:42 +02:00
// ------------------------------------------------------------------------------------------------
// helper methods
2008-01-11 01:12:01 +01:00
2005-06-23 04:07:45 +02:00
public static int log2a ( int x ) {
// this computes 1 + log2
// it is the number of bits in x, not the logarithmus by 2
2008-01-11 01:12:01 +01:00
int l = 0 ;
while ( x > 0 ) { x = x > > > 1 ; l + + ; }
return l ;
2005-04-07 21:19:42 +02:00
}
// ------------------------------------------------------------------------------------------------
// join
// We distinguish two principal solutions
// - constructive join (generate new data structure)
// - destructive join (remove non-valid elements from given data structure)
2007-12-28 19:47:45 +01:00
// The algorithm to perform the join can be also of two kind:
// - join by pairwise enumeration
2005-04-07 21:19:42 +02:00
// - join by iterative tests (where we distinguish left-right and right-left tests)
2006-09-12 13:13:27 +02:00
2008-08-02 14:12:04 +02:00
public static < A , B > TreeMap < A , B > joinConstructive ( final Collection < TreeMap < A , B > > maps , final boolean concatStrings ) {
2006-09-12 13:13:27 +02:00
// this joins all TreeMap(s) contained in maps
// first order entities by their size
2008-08-02 14:12:04 +02:00
final TreeMap < Long , TreeMap < A , B > > orderMap = new TreeMap < Long , TreeMap < A , B > > ( ) ;
2007-12-28 19:47:45 +01:00
TreeMap < A , B > singleMap ;
2008-08-02 14:12:04 +02:00
final Iterator < TreeMap < A , B > > i = maps . iterator ( ) ;
2006-09-12 13:13:27 +02:00
int count = 0 ;
while ( i . hasNext ( ) ) {
// get next entity:
2007-12-28 19:47:45 +01:00
singleMap = i . next ( ) ;
2006-09-12 13:13:27 +02:00
// check result
2007-12-28 19:47:45 +01:00
if ( ( singleMap = = null ) | | ( singleMap . size ( ) = = 0 ) ) return new TreeMap < A , B > ( ) ;
2006-09-12 13:13:27 +02:00
// store result in order of result size
2008-08-06 21:43:12 +02:00
orderMap . put ( Long . valueOf ( singleMap . size ( ) * 1000 + count ) , singleMap ) ;
2006-09-12 13:13:27 +02:00
count + + ;
}
// check if there is any result
2007-12-28 19:47:45 +01:00
if ( orderMap . size ( ) = = 0 ) return new TreeMap < A , B > ( ) ;
2006-09-12 13:13:27 +02:00
// we now must pairwise build up a conjunction of these maps
2008-06-06 18:01:27 +02:00
Long k = orderMap . firstKey ( ) ; // the smallest, which means, the one with the least entries
TreeMap < A , B > mapA , mapB , joinResult = orderMap . remove ( k ) ;
2006-09-12 13:13:27 +02:00
while ( ( orderMap . size ( ) > 0 ) & & ( joinResult . size ( ) > 0 ) ) {
// take the first element of map which is a result and combine it with result
2008-06-06 18:01:27 +02:00
k = orderMap . firstKey ( ) ; // the next smallest...
2006-09-12 13:13:27 +02:00
mapA = joinResult ;
2008-06-06 18:01:27 +02:00
mapB = orderMap . remove ( k ) ;
2007-08-02 02:42:37 +02:00
joinResult = joinConstructiveByTest ( mapA , mapB , concatStrings ) ; // TODO: better with enumeration?
2006-09-12 13:13:27 +02:00
// free resources
mapA = null ;
mapB = null ;
}
// in 'searchResult' is now the combined search result
2007-12-28 19:47:45 +01:00
if ( joinResult . size ( ) = = 0 ) return new TreeMap < A , B > ( ) ;
2006-09-12 13:13:27 +02:00
return joinResult ;
}
2008-08-02 14:12:04 +02:00
public static < A , B > TreeMap < A , B > joinConstructive ( final TreeMap < A , B > map1 , final TreeMap < A , B > map2 , final boolean concatStrings ) {
2006-09-13 19:13:28 +02:00
// comparators must be equal
if ( ( map1 = = null ) | | ( map2 = = null ) ) return null ;
if ( map1 . comparator ( ) ! = map2 . comparator ( ) ) return null ;
2007-12-28 19:47:45 +01:00
if ( ( map1 . size ( ) = = 0 ) | | ( map2 . size ( ) = = 0 ) ) return new TreeMap < A , B > ( map1 . comparator ( ) ) ;
2005-04-07 21:19:42 +02:00
2006-09-13 19:13:28 +02:00
// decide which method to use
2008-08-02 14:12:04 +02:00
final int high = ( ( map1 . size ( ) > map2 . size ( ) ) ? map1 . size ( ) : map2 . size ( ) ) ;
final int low = ( ( map1 . size ( ) > map2 . size ( ) ) ? map2 . size ( ) : map1 . size ( ) ) ;
final int stepsEnum = 10 * ( high + low - 1 ) ;
final int stepsTest = 12 * log2a ( high ) * low ;
2005-04-07 21:19:42 +02:00
2006-09-13 19:13:28 +02:00
// start most efficient method
if ( stepsEnum > stepsTest ) {
if ( map1 . size ( ) > map2 . size ( ) ) return joinConstructiveByTest ( map2 , map1 , concatStrings ) ;
return joinConstructiveByTest ( map1 , map2 , concatStrings ) ;
}
return joinConstructiveByEnumeration ( map1 , map2 , concatStrings ) ;
2005-04-07 21:19:42 +02:00
}
2006-09-13 19:13:28 +02:00
2008-01-19 01:40:19 +01:00
@SuppressWarnings ( " unchecked " )
2008-08-02 14:12:04 +02:00
private static < A , B > TreeMap < A , B > joinConstructiveByTest ( final TreeMap < A , B > small , final TreeMap < A , B > large , final boolean concatStrings ) {
final Iterator < Map . Entry < A , B > > mi = small . entrySet ( ) . iterator ( ) ;
final TreeMap < A , B > result = new TreeMap < A , B > ( large . comparator ( ) ) ;
2007-12-28 19:47:45 +01:00
Map . Entry < A , B > mentry1 ;
2008-01-09 10:58:56 +01:00
B mobj2 ;
2006-09-13 19:13:28 +02:00
while ( mi . hasNext ( ) ) {
2007-12-28 19:47:45 +01:00
mentry1 = mi . next ( ) ;
2006-09-13 19:13:28 +02:00
mobj2 = large . get ( mentry1 . getKey ( ) ) ;
2007-12-28 19:47:45 +01:00
if ( mobj2 ! = null ) {
if ( mentry1 . getValue ( ) instanceof String ) {
2008-01-09 10:58:56 +01:00
result . put ( mentry1 . getKey ( ) , ( B ) ( ( concatStrings ) ? ( mentry1 . getValue ( ) + ( String ) mobj2 ) : mentry1 . getValue ( ) ) ) ;
2007-12-28 19:47:45 +01:00
} else {
result . put ( mentry1 . getKey ( ) , mentry1 . getValue ( ) ) ;
}
}
2006-09-13 19:13:28 +02:00
}
return result ;
2005-04-07 21:19:42 +02:00
}
2008-01-19 01:40:19 +01:00
@SuppressWarnings ( " unchecked " )
2008-08-02 14:12:04 +02:00
private static < A , B > TreeMap < A , B > joinConstructiveByEnumeration ( final TreeMap < A , B > map1 , final TreeMap < A , B > map2 , final boolean concatStrings ) {
2007-12-28 19:47:45 +01:00
// implement pairwise enumeration
2008-08-02 14:12:04 +02:00
final Comparator < ? super A > comp = map1 . comparator ( ) ;
final Iterator < Map . Entry < A , B > > mi1 = map1 . entrySet ( ) . iterator ( ) ;
final Iterator < Map . Entry < A , B > > mi2 = map2 . entrySet ( ) . iterator ( ) ;
final TreeMap < A , B > result = new TreeMap < A , B > ( map1 . comparator ( ) ) ;
2006-09-13 19:13:28 +02:00
int c ;
if ( ( mi1 . hasNext ( ) ) & & ( mi2 . hasNext ( ) ) ) {
2007-12-28 19:47:45 +01:00
Map . Entry < A , B > mentry1 = mi1 . next ( ) ;
Map . Entry < A , B > mentry2 = mi2 . next ( ) ;
2006-09-13 19:13:28 +02:00
while ( true ) {
2008-01-11 01:12:01 +01:00
c = comp . compare ( mentry1 . getKey ( ) , mentry2 . getKey ( ) ) ;
2006-09-13 19:13:28 +02:00
if ( c < 0 ) {
2007-12-28 19:47:45 +01:00
if ( mi1 . hasNext ( ) ) mentry1 = mi1 . next ( ) ; else break ;
2006-09-13 19:13:28 +02:00
} else if ( c > 0 ) {
2007-12-28 19:47:45 +01:00
if ( mi2 . hasNext ( ) ) mentry2 = mi2 . next ( ) ; else break ;
2006-09-13 19:13:28 +02:00
} else {
2007-12-28 19:47:45 +01:00
if ( mentry1 . getValue ( ) instanceof String ) {
result . put ( mentry1 . getKey ( ) , ( B ) ( ( concatStrings ) ? ( ( String ) mentry1 . getValue ( ) + ( String ) mentry2 . getValue ( ) ) : ( String ) mentry1 . getValue ( ) ) ) ;
} else {
result . put ( mentry1 . getKey ( ) , mentry1 . getValue ( ) ) ;
}
if ( mi1 . hasNext ( ) ) mentry1 = mi1 . next ( ) ; else break ;
if ( mi2 . hasNext ( ) ) mentry2 = mi2 . next ( ) ; else break ;
2006-09-13 19:13:28 +02:00
}
}
}
return result ;
2005-04-07 21:19:42 +02:00
}
// now the same for set-set
2008-08-02 14:12:04 +02:00
public static < A > TreeSet < A > joinConstructive ( final TreeSet < A > set1 , final TreeSet < A > set2 ) {
2008-01-11 01:12:01 +01:00
// comparators must be equal
2005-04-07 21:19:42 +02:00
if ( ( set1 = = null ) | | ( set2 = = null ) ) return null ;
2008-01-11 01:12:01 +01:00
if ( set1 . comparator ( ) ! = set2 . comparator ( ) ) return null ;
if ( ( set1 . size ( ) = = 0 ) | | ( set2 . size ( ) = = 0 ) ) return new TreeSet < A > ( set1 . comparator ( ) ) ;
2005-04-07 21:19:42 +02:00
2008-01-11 01:12:01 +01:00
// decide which method to use
2008-08-02 14:12:04 +02:00
final int high = ( ( set1 . size ( ) > set2 . size ( ) ) ? set1 . size ( ) : set2 . size ( ) ) ;
final int low = ( ( set1 . size ( ) > set2 . size ( ) ) ? set2 . size ( ) : set1 . size ( ) ) ;
final int stepsEnum = 10 * ( high + low - 1 ) ;
final int stepsTest = 12 * log2a ( high ) * low ;
2005-04-07 21:19:42 +02:00
2008-01-11 01:12:01 +01:00
// start most efficient method
if ( stepsEnum > stepsTest ) {
if ( set1 . size ( ) < set2 . size ( ) ) return joinConstructiveByTest ( set1 , set2 ) ;
return joinConstructiveByTest ( set2 , set1 ) ;
}
return joinConstructiveByEnumeration ( set1 , set2 ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
private static < A > TreeSet < A > joinConstructiveByTest ( final TreeSet < A > small , final TreeSet < A > large ) {
final Iterator < A > mi = small . iterator ( ) ;
final TreeSet < A > result = new TreeSet < A > ( small . comparator ( ) ) ;
2008-01-11 01:12:01 +01:00
A o ;
while ( mi . hasNext ( ) ) {
o = mi . next ( ) ;
if ( large . contains ( o ) ) result . add ( o ) ;
}
return result ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
private static < A > TreeSet < A > joinConstructiveByEnumeration ( final TreeSet < A > set1 , final TreeSet < A > set2 ) {
2008-01-11 01:12:01 +01:00
// implement pairvise enumeration
2008-08-02 14:12:04 +02:00
final Comparator < ? super A > comp = set1 . comparator ( ) ;
final Iterator < A > mi = set1 . iterator ( ) ;
final Iterator < A > si = set2 . iterator ( ) ;
final TreeSet < A > result = new TreeSet < A > ( set1 . comparator ( ) ) ;
2008-01-11 01:12:01 +01:00
int c ;
if ( ( mi . hasNext ( ) ) & & ( si . hasNext ( ) ) ) {
A mobj = mi . next ( ) ;
A sobj = si . next ( ) ;
while ( true ) {
c = comp . compare ( mobj , sobj ) ;
if ( c < 0 ) {
if ( mi . hasNext ( ) ) mobj = mi . next ( ) ; else break ;
} else if ( c > 0 ) {
if ( si . hasNext ( ) ) sobj = si . next ( ) ; else break ;
} else {
result . add ( mobj ) ;
if ( mi . hasNext ( ) ) mobj = mi . next ( ) ; else break ;
if ( si . hasNext ( ) ) sobj = si . next ( ) ; else break ;
}
}
}
return result ;
2005-04-07 21:19:42 +02:00
}
2007-04-05 12:14:48 +02:00
// now the same for set-set
2008-08-02 14:12:04 +02:00
public static < A > boolean anymatch ( final TreeSet < A > set1 , final TreeSet < A > set2 ) {
2007-04-05 12:14:48 +02:00
// comparators must be equal
if ( ( set1 = = null ) | | ( set2 = = null ) ) return false ;
if ( set1 . comparator ( ) ! = set2 . comparator ( ) ) return false ;
if ( ( set1 . size ( ) = = 0 ) | | ( set2 . size ( ) = = 0 ) ) return false ;
// decide which method to use
2008-08-02 14:12:04 +02:00
final int high = ( ( set1 . size ( ) > set2 . size ( ) ) ? set1 . size ( ) : set2 . size ( ) ) ;
final int low = ( ( set1 . size ( ) > set2 . size ( ) ) ? set2 . size ( ) : set1 . size ( ) ) ;
final int stepsEnum = 10 * ( high + low - 1 ) ;
final int stepsTest = 12 * log2a ( high ) * low ;
2007-04-05 12:14:48 +02:00
// start most efficient method
if ( stepsEnum > stepsTest ) {
if ( set1 . size ( ) < set2 . size ( ) ) return anymatchByTest ( set1 , set2 ) ;
return anymatchByTest ( set2 , set1 ) ;
}
return anymatchByEnumeration ( set1 , set2 ) ;
}
2008-08-02 14:12:04 +02:00
private static < A > boolean anymatchByTest ( final TreeSet < A > small , final TreeSet < A > large ) {
final Iterator < A > mi = small . iterator ( ) ;
2008-01-11 01:12:01 +01:00
A o ;
2007-04-05 12:14:48 +02:00
while ( mi . hasNext ( ) ) {
o = mi . next ( ) ;
if ( large . contains ( o ) ) return true ;
}
return false ;
}
2008-08-02 14:12:04 +02:00
private static < A > boolean anymatchByEnumeration ( final TreeSet < A > set1 , final TreeSet < A > set2 ) {
2007-04-05 12:14:48 +02:00
// implement pairvise enumeration
2008-08-02 14:12:04 +02:00
final Comparator < ? super A > comp = set1 . comparator ( ) ;
final Iterator < A > mi = set1 . iterator ( ) ;
final Iterator < A > si = set2 . iterator ( ) ;
2007-04-05 12:14:48 +02:00
int c ;
if ( ( mi . hasNext ( ) ) & & ( si . hasNext ( ) ) ) {
2008-01-11 01:12:01 +01:00
A mobj = mi . next ( ) ;
A sobj = si . next ( ) ;
2007-04-05 12:14:48 +02:00
while ( true ) {
2008-01-11 01:12:01 +01:00
c = comp . compare ( mobj , sobj ) ;
2007-04-05 12:14:48 +02:00
if ( c < 0 ) {
if ( mi . hasNext ( ) ) mobj = mi . next ( ) ; else break ;
} else if ( c > 0 ) {
if ( si . hasNext ( ) ) sobj = si . next ( ) ; else break ;
} else {
return true ;
}
}
}
return false ;
}
2005-04-07 21:19:42 +02:00
// ------------------------------------------------------------------------------------------------
// exclude
2008-08-02 14:12:04 +02:00
public static < A , B > TreeMap < A , B > excludeConstructive ( final TreeMap < A , B > map , final TreeSet < A > set ) {
2005-04-07 21:19:42 +02:00
// comparators must be equal
if ( map = = null ) return null ;
if ( set = = null ) return map ;
if ( ( map . size ( ) = = 0 ) | | ( set . size ( ) = = 0 ) ) return map ;
2006-03-16 23:46:32 +01:00
if ( map . comparator ( ) ! = set . comparator ( ) ) return excludeConstructiveByTestMapInSet ( map , set ) ;
2005-04-07 21:19:42 +02:00
return excludeConstructiveByTestMapInSet ( map , set ) ;
2006-03-16 23:46:32 +01:00
// return excludeConstructiveByEnumeration(map, set);
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
private static < A , B > TreeMap < A , B > excludeConstructiveByTestMapInSet ( final TreeMap < A , B > map , final TreeSet < A > set ) {
final TreeMap < A , B > result = new TreeMap < A , B > ( map . comparator ( ) ) ;
2008-01-11 01:12:01 +01:00
A o ;
2008-08-20 09:54:56 +02:00
for ( Entry < A , B > entry : map . entrySet ( ) ) {
o = entry . getKey ( ) ;
if ( ! ( set . contains ( o ) ) ) result . put ( o , entry . getValue ( ) ) ;
2006-03-16 23:46:32 +01:00
}
return result ;
2005-04-07 21:19:42 +02:00
}
2008-01-11 01:12:01 +01:00
2008-08-02 14:12:04 +02:00
public static < A , B > void excludeDestructive ( final TreeMap < A , B > map , final TreeSet < A > set ) {
2005-04-07 21:19:42 +02:00
// comparators must be equal
if ( map = = null ) return ;
if ( set = = null ) return ;
2006-03-16 23:46:32 +01:00
if ( map . comparator ( ) ! = set . comparator ( ) ) return ;
2005-04-07 21:19:42 +02:00
if ( ( map . size ( ) = = 0 ) | | ( set . size ( ) = = 0 ) ) return ;
2006-03-16 23:46:32 +01:00
2005-04-07 21:19:42 +02:00
if ( map . size ( ) < set . size ( ) )
excludeDestructiveByTestMapInSet ( map , set ) ;
else
excludeDestructiveByTestSetInMap ( map , set ) ;
}
2008-08-02 14:12:04 +02:00
private static < A , B > void excludeDestructiveByTestMapInSet ( final TreeMap < A , B > map , final TreeSet < A > set ) {
final Iterator < A > mi = map . keySet ( ) . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( mi . hasNext ( ) ) if ( set . contains ( mi . next ( ) ) ) mi . remove ( ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
private static < A , B > void excludeDestructiveByTestSetInMap ( final TreeMap < A , B > map , final TreeSet < A > set ) {
final Iterator < A > si = set . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( si . hasNext ( ) ) map . remove ( si . next ( ) ) ;
2005-04-07 21:19:42 +02:00
}
// and the same again with set-set
2008-08-02 14:12:04 +02:00
public static < A > void excludeDestructive ( final TreeSet < A > set1 , final TreeSet < A > set2 ) {
2005-04-07 21:19:42 +02:00
// comparators must be equal
if ( set1 = = null ) return ;
if ( set2 = = null ) return ;
2006-03-16 23:46:32 +01:00
if ( set1 . comparator ( ) ! = set2 . comparator ( ) ) return ;
2005-04-07 21:19:42 +02:00
if ( ( set1 . size ( ) = = 0 ) | | ( set2 . size ( ) = = 0 ) ) return ;
if ( set1 . size ( ) < set2 . size ( ) )
excludeDestructiveByTestSmallInLarge ( set1 , set2 ) ;
else
excludeDestructiveByTestLargeInSmall ( set1 , set2 ) ;
}
2008-08-02 14:12:04 +02:00
private static < A > void excludeDestructiveByTestSmallInLarge ( final TreeSet < A > small , final TreeSet < A > large ) {
final Iterator < A > mi = small . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( mi . hasNext ( ) ) if ( large . contains ( mi . next ( ) ) ) mi . remove ( ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
private static < A > void excludeDestructiveByTestLargeInSmall ( final TreeSet < A > large , final TreeSet < A > small ) {
final Iterator < A > si = small . iterator ( ) ;
2006-03-16 23:46:32 +01:00
while ( si . hasNext ( ) ) large . remove ( si . next ( ) ) ;
2005-04-07 21:19:42 +02:00
}
// ------------------------------------------------------------------------------------------------
2008-08-02 14:12:04 +02:00
public static TreeMap < String , String > loadMap ( final String filename , final String sep ) {
final TreeMap < String , String > map = new TreeMap < String , String > ( ) ;
2005-07-12 17:09:35 +02:00
BufferedReader br = null ;
try {
br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( filename ) ) ) ;
String line ;
int pos ;
while ( ( line = br . readLine ( ) ) ! = null ) {
line = line . trim ( ) ;
if ( ( line . length ( ) > 0 ) & & ( ! ( line . startsWith ( " # " ) ) ) & & ( ( pos = line . indexOf ( sep ) ) > 0 ) )
map . put ( line . substring ( 0 , pos ) . trim ( ) . toLowerCase ( ) , line . substring ( pos + sep . length ( ) ) . trim ( ) ) ;
}
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2005-07-12 17:09:35 +02:00
} finally {
2008-08-02 14:12:04 +02:00
if ( br ! = null ) try { br . close ( ) ; } catch ( final Exception e ) { }
2005-07-12 17:09:35 +02:00
}
return map ;
}
2008-08-02 14:12:04 +02:00
public static TreeMap < String , ArrayList < String > > loadMapMultiValsPerKey ( final String filename , final String sep ) {
final TreeMap < String , ArrayList < String > > map = new TreeMap < String , ArrayList < String > > ( ) ;
2007-02-24 14:56:32 +01:00
BufferedReader br = null ;
try {
br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( filename ) ) ) ;
String line , key , value ;
int pos ;
while ( ( line = br . readLine ( ) ) ! = null ) {
line = line . trim ( ) ;
if ( ( line . length ( ) > 0 ) & & ( ! ( line . startsWith ( " # " ) ) ) & & ( ( pos = line . indexOf ( sep ) ) > 0 ) ) {
key = line . substring ( 0 , pos ) . trim ( ) . toLowerCase ( ) ;
value = line . substring ( pos + sep . length ( ) ) . trim ( ) ;
2008-01-11 01:12:01 +01:00
if ( ! map . containsKey ( key ) ) map . put ( key , new ArrayList < String > ( ) ) ;
map . get ( key ) . add ( value ) ;
2007-02-24 14:56:32 +01:00
}
}
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2007-02-24 14:56:32 +01:00
} finally {
2008-08-02 14:12:04 +02:00
if ( br ! = null ) try { br . close ( ) ; } catch ( final Exception e ) { }
2007-02-24 14:56:32 +01:00
}
return map ;
}
2008-08-02 14:12:04 +02:00
public static TreeSet < String > loadList ( final File file , final Comparator < String > c ) {
final TreeSet < String > list = new TreeSet < String > ( c ) ;
2005-07-12 17:09:35 +02:00
if ( ! ( file . exists ( ) ) ) return list ;
BufferedReader br = null ;
try {
br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( file ) ) ) ;
String line ;
while ( ( line = br . readLine ( ) ) ! = null ) {
line = line . trim ( ) ;
if ( ( line . length ( ) > 0 ) & & ( ! ( line . startsWith ( " # " ) ) ) ) list . add ( line . trim ( ) . toLowerCase ( ) ) ;
}
br . close ( ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2005-07-12 17:09:35 +02:00
} finally {
2008-08-02 14:12:04 +02:00
if ( br ! = null ) try { br . close ( ) ; } catch ( final Exception e ) { }
2005-07-12 17:09:35 +02:00
}
return list ;
}
2008-08-02 14:12:04 +02:00
public static String setToString ( final Set < String > set , final char separator ) {
final Iterator < String > i = set . iterator ( ) ;
2008-12-04 13:54:16 +01:00
final StringBuilder sb = new StringBuilder ( set . size ( ) * 7 ) ;
2008-01-11 01:12:01 +01:00
if ( i . hasNext ( ) ) sb . append ( i . next ( ) ) ;
2007-03-13 23:18:36 +01:00
while ( i . hasNext ( ) ) {
2008-01-11 01:12:01 +01:00
sb . append ( separator ) . append ( i . next ( ) ) ;
2007-03-13 23:18:36 +01:00
}
return new String ( sb ) ;
}
2005-07-12 17:09:35 +02:00
// ------------------------------------------------------------------------------------------------
2008-08-02 14:12:04 +02:00
public static void main ( final String [ ] args ) {
final TreeMap < String , String > m = new TreeMap < String , String > ( ) ;
final TreeMap < String , String > s = new TreeMap < String , String > ( ) ;
2005-04-07 21:19:42 +02:00
m . put ( " a " , " a " ) ;
m . put ( " x " , " x " ) ;
m . put ( " f " , " f " ) ;
m . put ( " h " , " h " ) ;
m . put ( " w " , " w " ) ;
m . put ( " 7 " , " 7 " ) ;
m . put ( " t " , " t " ) ;
m . put ( " k " , " k " ) ;
m . put ( " y " , " y " ) ;
m . put ( " z " , " z " ) ;
2006-09-13 19:13:28 +02:00
s . put ( " a " , " a " ) ;
s . put ( " b " , " b " ) ;
s . put ( " c " , " c " ) ;
s . put ( " k " , " k " ) ;
s . put ( " l " , " l " ) ;
s . put ( " m " , " m " ) ;
s . put ( " n " , " n " ) ;
s . put ( " o " , " o " ) ;
s . put ( " p " , " p " ) ;
s . put ( " q " , " q " ) ;
s . put ( " r " , " r " ) ;
s . put ( " s " , " s " ) ;
s . put ( " t " , " t " ) ;
s . put ( " x " , " x " ) ;
2005-04-07 21:19:42 +02:00
System . out . println ( " Compare " + m . toString ( ) + " with " + s . toString ( ) ) ;
2006-09-13 19:13:28 +02:00
System . out . println ( " Join= " + joinConstructiveByEnumeration ( m , s , true ) ) ;
System . out . println ( " Join= " + joinConstructiveByTest ( m , s , true ) ) ;
System . out . println ( " Join= " + joinConstructiveByTest ( m , s , true ) ) ;
System . out . println ( " Join= " + joinConstructive ( m , s , true ) ) ;
2008-01-11 01:12:01 +01:00
//System.out.println("Exclude=" + excludeConstructiveByTestMapInSet(m, s.keySet()));
2005-04-07 21:19:42 +02:00
/ *
for ( int low = 0 ; low < 10 ; low + + )
for ( int high = 0 ; high < 100 ; high = high + 10 ) {
int stepsEnum = 10 * high ;
int stepsTest = 12 * log2 ( high ) * low ;
System . out . println ( " low= " + low + " , high= " + high + " , stepsEnum= " + stepsEnum + " , stepsTest= " + stepsTest + " ; best method is " + ( ( stepsEnum < stepsTest ) ? " joinByEnumeration " : " joinByTest " ) ) ;
}
* /
}
}