fix for concurrency problem and endless loop in /suggest.json

This commit is contained in:
Michael Christen 2012-01-05 08:35:44 +01:00
parent c21966bb43
commit 354b976110

View File

@ -48,14 +48,17 @@ public class DidYouMean {
private static final char[] ALPHABET_KANJI = new char[512];
static {
// this is very experimental: a very small subset of Kanji
for (char a = '\u3400'; a <= '\u34ff'; a++) ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
for (char a = '\u4e00'; a <= '\u4eff'; a++) ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
for (char a = '\u3400'; a <= '\u34ff'; a++) {
ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
}
for (char a = '\u4e00'; a <= '\u4eff'; a++) {
ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
}
}
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
private static char[] alphabet = ALPHABET_LATIN;
private static final StringBuilder POISON_STRING = new StringBuilder("\n");
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
private static final StringBuilder POISON_STRING = new StringBuilder("\n");
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
private static final wordLengthComparator WORD_LENGTH_COMPARATOR = new wordLengthComparator();
private final IndexCell<WordReference> index;
@ -66,6 +69,7 @@ public class DidYouMean {
private boolean createGen; // keeps the value 'true' as long as no entry in guessLib is written
private final SortedSet<StringBuilder> resultSet;
private final indexSizeComparator INDEX_SIZE_COMPARATOR;
private char[] alphabet;
/**
@ -88,25 +92,31 @@ public class DidYouMean {
boolean alphafound = false;
alphatest: for (final char[] alpha: ALPHABETS) {
if (isAlphabet(alpha, testchar)) {
alphabet = alpha;
this.alphabet = new char[alpha.length];
System.arraycopy(ALPHABET_LATIN, 0, this.alphabet, 0, alpha.length);
alphafound = true;
break alphatest;
}
}
if (!alphafound) {
// generate generic alphabet using simply a character block of 256 characters
final char firstchar = (char) ((0xff & (testchar / 256)) * 256);
final char lastchar = (char) (firstchar + 255);
alphabet = new char[256];
for (char a = firstchar; a <= lastchar; a++) {
alphabet[0xff & (a - firstchar)] = a;
final int firstchar = (0xff & (testchar / 256)) * 256;
final int lastchar = firstchar + 255;
this.alphabet = new char[256];
// test this with /suggest.json?q=%EF%BD%84
for (int a = firstchar; a <= lastchar; a++) {
this.alphabet[0xff & (a - firstchar)] = (char) a;
}
}
}
}
private static final boolean isAlphabet(final char[] alpha, final char testchar) {
for (final char a: alpha) if (a == testchar) return true;
for (final char a: alpha) {
if (a == testchar) {
return true;
}
}
return false;
}
@ -125,10 +135,15 @@ public class DidYouMean {
* @return
*/
public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
if (this.word.length() < MinimumInputWordLength) return this.resultSet; // return nothing if input is too short
if (this.word.length() < MinimumInputWordLength)
{
return this.resultSet; // return nothing if input is too short
}
final long startTime = System.currentTimeMillis();
final long timelimit = startTime + timeout;
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) {
return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
}
final SortedSet<StringBuilder> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) {
Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: "
@ -138,8 +153,12 @@ public class DidYouMean {
final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
for (final StringBuilder s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
if (!(scored.sizeSmaller(2 * preSortSelection))) break;
if (System.currentTimeMillis() > timelimit) {
break;
}
if (!(scored.sizeSmaller(2 * preSortSelection))) {
break;
}
scored.inc(s, this.index.count(Word.word2hash(s)));
}
final SortedSet<StringBuilder> countSorted = Collections.synchronizedSortedSet(new TreeSet<StringBuilder>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
@ -147,8 +166,12 @@ public class DidYouMean {
while (!scored.isEmpty() && countSorted.size() < preSortSelection) {
final StringBuilder s = scored.getMaxKey();
final int score = scored.delete(s);
if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s);
if (System.currentTimeMillis() > timelimit) break;
if (s.length() >= MinimumOutputWordLength && score > wc) {
countSorted.add(s);
}
if (System.currentTimeMillis() > timelimit) {
break;
}
}
// finished
@ -180,11 +203,19 @@ public class DidYouMean {
final SortedSet<StringBuilder> result = new TreeSet<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
StringBuilder sb;
for (int i = 0; i < words.length; i++) {
if (s[i].isEmpty()) continue;
if (s[i].isEmpty()) {
continue;
}
sb = new StringBuilder(20);
for (int j = 0; j < words.length; j++) {
if (j > 0) sb.append(' ');
if (i == j) sb.append(s[j].first()); else sb.append(words[j]);
if (j > 0) {
sb.append(' ');
}
if (i == j) {
sb.append(s[j].first());
} else {
sb.append(words[j]);
}
}
result.add(sb);
}
@ -211,10 +242,12 @@ public class DidYouMean {
// get a single recommendation for the word without altering the word
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(this.word);
for (final StringBuilder t: libr) {
if (!t.equals(this.word)) try {
this.createGen = false;
this.guessLib.put(t);
} catch (final InterruptedException e) {}
if (!t.equals(this.word)) {
try {
this.createGen = false;
this.guessLib.put(t);
} catch (final InterruptedException e) {}
}
}
// create and start producers
@ -226,34 +259,46 @@ public class DidYouMean {
producers[1] = new AddingOneLetter();
producers[2] = new DeletingOneLetter();
producers[3] = new ReversingTwoConsecutiveLetters();
for (final Thread t: producers) t.start();
for (final Thread t: producers) {
t.start();
}
// start more consumers if there are more cores
if (consumers.length > 1) for (int i = 1; i < consumers.length; i++) {
consumers[i] = new Consumer();
consumers[i].start();
if (consumers.length > 1) {
for (int i = 1; i < consumers.length; i++) {
consumers[i] = new Consumer();
consumers[i].start();
}
}
// now decide which kind of guess is better
// we take guessLib entries as long as there is any entry in it
// to see if this is the case, we must wait for termination of the producer
for (final Thread t: producers) try { t.join(); } catch (final InterruptedException e) {}
for (final Thread t: producers) {
try { t.join(); } catch (final InterruptedException e) {}
}
// if there is not any entry in guessLib, then transfer all entries from the
// guessGen to guessLib
if (this.createGen) try {
this.guessGen.put(POISON_STRING);
StringBuilder s;
while (!(s = this.guessGen.take()).equals(POISON_STRING)) this.guessLib.put(s);
} catch (final InterruptedException e) {}
if (this.createGen) {
try {
this.guessGen.put(POISON_STRING);
StringBuilder s;
while (!(s = this.guessGen.take()).equals(POISON_STRING)) {
this.guessLib.put(s);
}
} catch (final InterruptedException e) {}
}
// put poison into guessLib to terminate consumers
for (@SuppressWarnings("unused") final Consumer c: consumers)
for (@SuppressWarnings("unused") final Consumer c: consumers) {
try { this.guessLib.put(POISON_STRING); } catch (final InterruptedException e) {}
}
// wait for termination of consumer
for (final Consumer c: consumers)
for (final Consumer c: consumers) {
try { c.join(); } catch (final InterruptedException e) {}
}
// we don't want the given word in the result
this.resultSet.remove(this.word);
@ -265,7 +310,9 @@ public class DidYouMean {
private void test(final StringBuilder s) throws InterruptedException {
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(s);
libr.addAll(LibraryProvider.geoLoc.recommend(s));
if (!libr.isEmpty()) this.createGen = false;
if (!libr.isEmpty()) {
this.createGen = false;
}
for (final StringBuilder t: libr) {
this.guessLib.put(t);
}
@ -284,16 +331,20 @@ public class DidYouMean {
@Override
public void run() {
char m;
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
m = DidYouMean.this.word.charAt(i);
for (final char c: alphabet) {
if (m != c) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
test(ts);
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
try {
m = DidYouMean.this.word.charAt(i);
for (final char c: DidYouMean.this.alphabet) {
if (m != c) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
test(ts);
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
}
} catch (final InterruptedException e) {}
} catch (final InterruptedException e) {}
}
}
}
@ -306,11 +357,15 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
} catch (final InterruptedException e) {}
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
} catch (final InterruptedException e) {}
}
}
}
@ -324,13 +379,17 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i <= DidYouMean.this.wordLen; i++) try {
for (final char c: alphabet) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
}
} catch (final InterruptedException e) {}
for (int i = 0; i <= DidYouMean.this.wordLen; i++) {
try {
for (final char c: DidYouMean.this.alphabet) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
} catch (final InterruptedException e) {}
}
}
}
@ -343,11 +402,15 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
} catch (final InterruptedException e) {}
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) {
try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
} catch (final InterruptedException e) {}
}
}
}
@ -364,8 +427,12 @@ public class DidYouMean {
StringBuilder s;
try {
while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) {
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) DidYouMean.this.resultSet.add(s);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) {
DidYouMean.this.resultSet.add(s);
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
} catch (final InterruptedException e) {}
}
@ -377,10 +444,13 @@ public class DidYouMean {
*/
private class indexSizeComparator implements Comparator<StringBuilder> {
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final int i1 = DidYouMean.this.index.count(Word.word2hash(o1));
final int i2 = DidYouMean.this.index.count(Word.word2hash(o2));
if (i1 == i2) return WORD_LENGTH_COMPARATOR.compare(o1, o2);
if (i1 == i2) {
return WORD_LENGTH_COMPARATOR.compare(o1, o2);
}
return (i1 < i2) ? 1 : -1; // '<' is correct, because the largest count shall be ordered to be the first position in the result
}
}
@ -391,10 +461,13 @@ public class DidYouMean {
*/
private static class wordLengthComparator implements Comparator<StringBuilder> {
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final int i1 = o1.length();
final int i2 = o2.length();
if (i1 == i2) return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
if (i1 == i2) {
return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
}
return (i1 < i2) ? 1 : -1; // '<' is correct, because the longest word shall be first
}
@ -411,10 +484,13 @@ public class DidYouMean {
this.secondaryComparator = secondaryComparator;
}
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final boolean o1m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o1, this.head);
final boolean o2m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o2, this.head);
if ((o1m && o2m) || (!o1m && !o2m)) return this.secondaryComparator.compare(o1, o2);
if ((o1m && o2m) || (!o1m && !o2m)) {
return this.secondaryComparator.compare(o1, o2);
}
return o1m ? -1 : 1;
}
}