This commit is contained in:
Michael Peter Christen 2016-09-14 16:24:55 +02:00
commit 2107674999
5 changed files with 51 additions and 14 deletions

View File

@ -171,8 +171,8 @@ Please clone our code and help with development!
The code is licensed under the GPL v2.
Compiling YaCy:
- you need java 1.7 and ant
- just compile: "ant clean all" - then you can "./startYACY.sh"
- you need Java 1.7 or later and [Apache Ant](http://ant.apache.org/)
- just compile: "ant clean all" - then you can "./startYACY.sh" or "./startYACY.bat"
- create a release tarball: "ant dist"
- create a Mac OS release: "ant distMacApp" (works only on a Mac)
- create a debian release: "ant deb"

View File

@ -66,7 +66,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
new Column("t", Column.celltype_cardinal, Column.encoder_b256, 2, "posintext"),
new Column("r", Column.celltype_cardinal, Column.encoder_b256, 1, "posinphrase"),
new Column("o", Column.celltype_cardinal, Column.encoder_b256, 1, "posofphrase"),
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 1, "worddistance"),
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 1, "worddistance"), // arbitrary column for avg distance of search query words
new Column("k", Column.celltype_cardinal, Column.encoder_b256, 1, "reserve")
},
Base64Order.enhancedCoder
@ -253,7 +253,10 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
}
/**
* First position of word in text
* First position of word in text.
* positions() is used to remember word positions for each query word of an
* multi word search query. As we currently don't include a separate posintext()
* function, we use positions to make the posintext value available.
* @return Collection with one element
*/
@Override

View File

@ -253,9 +253,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.hitcount, // how often appears this word in the text
this.wordsintext, // total number of words
this.phrasesintext, // total number of phrases
// TODO: positon 1 on empty positions may give high ranking scores for unknown pos (needs to be checked if 0 would be appropriate)
this.positions.isEmpty() ? -1 : this.positions.iterator().next(), // position of word in all words
this.positions.isEmpty() ? 0 : this.positions.iterator().next(), // position of word in all words (
this.posinphrase, // position of word in its phrase
this.posofphrase, // number of the phrase where word appears
this.lastModified, // last-modified time of the document where word appears

View File

@ -98,17 +98,29 @@ public abstract class AbstractReference implements Reference {
}
return r;
}
/**
* max position of search query words for multi word queries
* @return
*/
@Override
public int maxposition() {
return max(positions());
}
/**
* min word position of search query words for multi word queries
* @return
*/
@Override
public int minposition() {
return min(positions());
}
/**
* The average distance (in words) between search query terms for multi word searches.
* @return word distance
*/
@Override
public int distance() {
if (positions().size() < 2) return 0;

View File

@ -51,14 +51,38 @@ public interface Reference {
@Override
public boolean equals(Object other);
/**
* Joins a Reference into this one, setting the values appropriate for ranking
* @param joined reference
*/
public void join(final Reference oe);
/**
* Positions or search query words for the referenced result url
* This is only valid for multi word search queries.
* The positions contain the first word position for every search query word
* which has been joined (by join() )
* @return list with word position
*/
public Collection<Integer> positions();
/**
* max position of search query words (for multi word queries)
* @return
*/
public int maxposition();
/**
* min word position of search query words (for multi word queries)
* @return
*/
public int minposition();
/**
* The average distance (in words) between search query terms for multi word searches.
* The distance is calculated from positions()
* @return word distance
*/
public int distance();
}