mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added option to enrich vocabularies with synonyms from synonym database
This commit is contained in:
parent
6a2a669db4
commit
0dc6e0a5f2
|
@ -134,6 +134,8 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
|
|||
<dd><input type="number" id="discovercolumnobjectlink" name="discovercolumnobjectlink" min="-1" max="99" step="1" size="2" value="-1" disabled="disabled" style="width:50px;"> (first has index 0, if unused set -1)</dd>
|
||||
<dt><i>Charset of Import File</i></dt>
|
||||
<dd><select name="charset">#{charset}#<option value="#[name]#" #(selected)#::selected="selected"#(/selected)#>#[name]#</option>#{/charset}#</select></dd>
|
||||
<dt><i>Auto-Enrich with Synonyms from Stemming Library</i></dt>
|
||||
<dd><input type="checkbox" name="discoverenrichsynonyms" id="discoverenrichsynonyms" checked="checked" /></dd>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt></dt><dd><input type="submit" name="create" value="Create" /></dd>
|
||||
|
|
|
@ -29,9 +29,11 @@ import java.util.Collection;
|
|||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||
import net.yacy.cora.language.synonyms.SynonymLibrary;
|
||||
import net.yacy.cora.lod.vocabulary.DCTerms;
|
||||
import net.yacy.cora.lod.vocabulary.Tagging;
|
||||
import net.yacy.cora.lod.vocabulary.Tagging.SOTuple;
|
||||
|
@ -80,6 +82,7 @@ public class Vocabulary_p {
|
|||
final int discovercolumnliteral = post.getInt("discovercolumnliteral", 0);
|
||||
final int discovercolumnobjectlink = post.getInt("discovercolumnobjectlink", -1);
|
||||
final File discoverFromCSVFile = discoverFromCSVPath.length() > 0 ? new File(discoverFromCSVPath) : null;
|
||||
final boolean discoverenrichsynonyms = post.getBoolean("discoverenrichsynonyms");
|
||||
Segment segment = sb.index;
|
||||
String t;
|
||||
if (!discoverNot) {
|
||||
|
@ -88,10 +91,17 @@ public class Vocabulary_p {
|
|||
String line = null;
|
||||
while ((line = r.readLine()) != null) {
|
||||
String[] l = line.split(";");
|
||||
String literal = discovercolumnliteral < 0 || l.length <= discovercolumnliteral ? null : l[discovercolumnliteral];
|
||||
String objectlink = discovercolumnobjectlink < 0 || l.length <= discovercolumnobjectlink ? null : l[discovercolumnobjectlink];
|
||||
String literal = discovercolumnliteral < 0 || l.length <= discovercolumnliteral ? null : l[discovercolumnliteral].trim();
|
||||
String objectlink = discovercolumnobjectlink < 0 || l.length <= discovercolumnobjectlink ? null : l[discovercolumnobjectlink].trim();
|
||||
if (literal != null && literal.length() > 0) {
|
||||
table.put(literal, new Tagging.SOTuple(Tagging.normalizeTerm(literal), objectlink == null ? "" : objectlink));
|
||||
String synonyms = Tagging.normalizeTerm(literal);
|
||||
if (discoverenrichsynonyms) {
|
||||
Set<String> sy = SynonymLibrary.getSynonyms(literal);
|
||||
if (sy != null) {
|
||||
for (String s: sy) synonyms += "," + s;
|
||||
}
|
||||
}
|
||||
table.put(literal, new Tagging.SOTuple(synonyms, objectlink == null ? "" : objectlink));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -94,7 +94,8 @@ public class SynonymLibrary {
|
|||
* @return a list of synonyms bot without the requested word
|
||||
*/
|
||||
public static Set<String> getSynonyms(String word) {
|
||||
word = word.toLowerCase();
|
||||
if (word == null) return null;
|
||||
word = word.toLowerCase().trim();
|
||||
if (word.length() < 2) return null;
|
||||
String key = word.substring(0, 2);
|
||||
List<Set<String>> symsetlist = lib.get(key);
|
||||
|
|
Loading…
Reference in New Issue
Block a user