fix min/max calculation of WordReferenceVars.distance()

Issue was the calculation in AbstractReference with positions.clear() call,
this made distance result always 0 (distance needs min 2 positions) and created concurrency issues.
+ unit test of changes
This commit is contained in:
reger 2016-10-17 23:58:28 +02:00
parent da362628fb
commit 8b74a6bf57
3 changed files with 128 additions and 9 deletions

View File

@ -336,7 +336,24 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (virtualAge() > (v = other.virtualAge())) this.virtualAge = v;
if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v;
if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v;
if (other.positions != null) a(this.positions, min(this.positions, other.positions));
int minpos = min(this.positions, other.positions);
if (minpos != Integer.MAX_VALUE) {
int odist = other.distance();
int dist = this.distance();
this.positions.clear(); // we want only the min
this.positions.add(minpos);
// handle distance for multi word queries
// distance is calculated from positions, must be at least 2 positions for calculation
if (odist > 0 && odist < dist) {
this.positions.add(minpos + odist);
} else if (dist > 0) {
this.positions.add(minpos + dist);
} else if (odist > 0) {
this.positions.add(minpos + odist);
}
}
if (this.posinphrase > (v = other.posinphrase)) this.posinphrase = v;
if (this.posofphrase > (v = other.posofphrase)) this.posofphrase = v;
if (this.lastModified > (w = other.lastModified)) this.lastModified = w;
@ -358,7 +375,22 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (virtualAge() < (v = other.virtualAge())) this.virtualAge = v;
if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v;
if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v;
if (other.positions != null) a(this.positions, max(this.positions, other.positions));
int maxpos = max(this.positions, other.positions);
if (maxpos != Integer.MIN_VALUE) {
int odist = other.distance();
int dist = this.distance();
this.positions.clear();
this.positions.add(maxpos);
// handle distance for multi word queries
// distance is calculated from positions, must be at least 2 positions for calculation
if (odist > dist) {
this.positions.add(maxpos - odist); // special cas for max, to not be altered by the pos for distance use pos before maxpos
} else if (dist > 0) {
this.positions.add(maxpos - dist);
}
}
if (this.posinphrase < (v = other.posinphrase)) this.posinphrase = v;
if (this.posofphrase < (v = other.posofphrase)) this.posofphrase = v;
if (this.lastModified < (w = other.lastModified)) this.lastModified = w;

View File

@ -32,13 +32,6 @@ import java.util.Iterator;
public abstract class AbstractReference implements Reference {
protected static void a(Collection<Integer> a, int i) {
assert a != null;
if (i == Integer.MAX_VALUE || i == Integer.MIN_VALUE) return; // signal for 'do nothing'
a.clear();
a.add(i);
}
protected static int max(Collection<Integer> a, Collection<Integer> b) {
if (a == null || a.isEmpty()) return max(b);
if (b == null || b.isEmpty()) return max(a);

View File

@ -0,0 +1,94 @@
/**
* WordReferenceVarsTest
* part of YaCy
* Copyright 2016 by reger24; https://github.com/reger24
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.data.word;
import java.net.MalformedURLException;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.crawler.retrieval.Response;
import net.yacy.kelondro.util.Bitfield;
import org.junit.Test;
import static org.junit.Assert.*;
/**
* Unit tests for WordReferenceVars class.
*/
public class WordReferenceVarsTest {
/**
* Test of min method, of class WordReferenceVars.
*
* @author reger24
*/
@Test
public void testMin() throws MalformedURLException {
// testing posintext and distance calculation
int minposintext = 5; // minposintext for test
int maxposintext = 30; // maxposintext for test
DigestURL url = new DigestURL("http://test.org/test.html");
// create a WordReference template with posintext = minposintext = 5
final WordReferenceRow ientry = new WordReferenceRow(
url.hash(), 20, 3, 2,
1, 1,
System.currentTimeMillis(), System.currentTimeMillis(),
UTF8.getBytes("en"), Response.DT_TEXT,
0, 0);
Word word = new Word(minposintext, 1, 100);
word.flags = new Bitfield(4);
ientry.setWord(word);
WordReferenceVars wvMin = new WordReferenceVars(ientry, true);
wvMin.addPosition(10); // add position for distance testing
WordReferenceVars wvMax = wvMin.clone();
// create a other reference
WordReferenceVars wvOther = new WordReferenceVars(ientry, true);
wvOther.addPosition(maxposintext); // add position (max) for distance testing
// test min for posintext and distance
wvMin.min(wvOther);
assertEquals("min posintext", minposintext, wvMin.minposition());
assertEquals("min distance", 5, wvMin.distance());
wvMin.min(wvOther); // test repeated call doesn't change result
assertEquals("min posintext (repeat)", minposintext, wvMin.minposition());
assertEquals("min distance (repeat)", 5, wvMin.distance());
// test max for posintext and distance
wvMax.max(wvOther);
assertEquals("max posintext", maxposintext, wvMax.maxposition());
assertEquals("max distance", maxposintext - minposintext, wvMax.distance());
wvMax.max(wvOther); // test repeated calls don't change result
wvMax.max(wvOther);
assertEquals("max posintext (repeat)", maxposintext, wvMax.maxposition());
assertEquals("max distance (repeat)", maxposintext - minposintext, wvMax.distance());
// reverse test
wvOther.max(wvMax);
assertEquals("max posintext (reverse)", maxposintext, wvOther.maxposition());
assertEquals("max distance (repeat)", maxposintext - minposintext, wvOther.distance());
}
}