From c560a582acfbca16145ccb62d7db5f4125058028 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 26 Jan 2012 16:44:30 +0100 Subject: [PATCH] fix for single-word vocabulary lines --- source/net/yacy/document/Autotagging.java | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index a3036e72f..4818d06e2 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -178,16 +178,13 @@ public class Autotagging { p = line.indexOf('\t'); } if (p < 0) { - this.tag2print.put(line, line); - this.print2tag.put(line, line); + k = normalizeKey(line); + v = normalizeWord(line); + this.tag2print.put(v, k); + this.print2tag.put(k, v); continue vocloop; } - k = line.substring(0, p).trim(); - k = k.replaceAll(" \\+", ", "); // remove symbols that are bad in a query attribute - k = k.replaceAll(" /", ", "); - k = k.replaceAll("\\+", ","); - k = k.replaceAll("/", ","); - k = k.replaceAll(" ", " "); + k = normalizeKey(line.substring(0, p)); v = line.substring(p + 1); tags = v.split(","); tagloop: for (String t: tags) { @@ -204,6 +201,16 @@ public class Autotagging { } } + private final String normalizeKey(String k) { + k = k.trim(); + k = k.replaceAll(" \\+", ", "); // remove symbols that are bad in a query attribute + k = k.replaceAll(" /", ", "); + k = k.replaceAll("\\+", ","); + k = k.replaceAll("/", ","); + k = k.replaceAll(" ", " "); + return k; + } + public Vocabulary(String name, Localization localization) { this(name); Set locNames = localization.locationNames();