diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list
index bb33c44ac..d75e0d98f 100644
--- a/defaults/solr.keys.list
+++ b/defaults/solr.keys.list
@@ -75,15 +75,21 @@ wordcount_i
## internal links, normalized (absolute URLs), as - tag with anchor text and nofollow, textgen
attr_inboundlinks
-## number of inbound links, int
+## total number of inbound links, int
inboundlinkscount_i
+## number of inbound links with noindex tag, int
+inboundlinksnoindexcount_i
+
## external links, normalized (absolute URLs), as - tag with anchor text and nofollow, textgen
attr_outboundlinks
-## number of external links, int
+## total number of external links, int
outboundlinkscount_i
+## number of external links with noindex tag, int
+outboundlinksnoindexcount_i
+
## h1 header, textgen
attr_h1
diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java
index 627093db0..a0b59d143 100644
--- a/htroot/IndexFederated_p.java
+++ b/htroot/IndexFederated_p.java
@@ -61,11 +61,13 @@ public class IndexFederated_p {
sb.solrConnector = null;
}
+ final String schemename = sb.getConfig("federated.service.solr.indexing.schemefile", "solr.keys.default.list");
+ final SolrScheme scheme = new SolrScheme(new File(env.getDataPath(), "DATA/SETTINGS/" + schemename));
+
if (!solrWasOn && solrIsOnAfterwards) {
// switch on
final String solrurls = sb.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
final boolean usesolr = sb.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
- final SolrScheme scheme = new SolrScheme(new File(env.getDataPath(), "DATA/SETTINGS/solr.keys.default.list"));
try {
sb.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, scheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
} catch (final IOException e) {
@@ -75,7 +77,6 @@ public class IndexFederated_p {
}
// read index scheme table flags
- final SolrScheme scheme = sb.solrConnector.getScheme();
final Iterator i = scheme.allIterator();
ConfigurationSet.Entry entry;
while (i.hasNext()) {
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index ebd9aeada..3d21f00a0 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -562,7 +562,8 @@ public final class Switchboard extends serverSwitch {
// prepare a solr index profile switch list
final File solrBackupProfile = new File("defaults/solr.keys.list");
- final File solrWorkProfile = new File(getDataPath(), "DATA/SETTINGS/solr.keys.default.list");
+ final String schemename = getConfig("federated.service.solr.indexing.schemefile", "solr.keys.default.list");
+ final File solrWorkProfile = new File(getDataPath(), "DATA/SETTINGS/" + schemename);
if (!solrWorkProfile.exists()) FileUtils.copy(solrBackupProfile, solrWorkProfile);
final SolrScheme backupScheme = new SolrScheme(solrBackupProfile);
final SolrScheme workingScheme = new SolrScheme(solrWorkProfile);
diff --git a/source/net/yacy/cora/services/federated/solr/SolrScheme.java b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
index 7d6bd5513..868d0a9d1 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrScheme.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
@@ -111,14 +111,14 @@ public class SolrScheme extends ConfigurationSet {
addSolr(solrdoc, "keywords", yacydoc.dc_subject(' '));
final String content = UTF8.String(yacydoc.getTextBytes());
addSolr(solrdoc, "text_t", content);
- if (contains("wordcount_i")) {
+ if (isEmpty() || contains("wordcount_i")) {
final int contentwc = content.split(" ").length;
addSolr(solrdoc, "wordcount_i", contentwc);
}
// path elements of link
final String path = digestURI.getPath();
- if (path != null && contains("attr_paths")) {
+ if (path != null && (isEmpty() || contains("attr_paths"))) {
final String[] paths = path.split("/");
if (paths.length > 0) addSolr(solrdoc, "attr_paths", paths);
}
@@ -126,8 +126,9 @@ public class SolrScheme extends ConfigurationSet {
// list all links
final Map alllinks = yacydoc.getAnchors();
int c = 0;
- addSolr(solrdoc, "inboundlinkscount_i", yacydoc.inboundLinkCount());
- if (contains("attr_inboundlinks")) {
+ if (isEmpty() || contains("inboundlinkscount_i")) addSolr(solrdoc, "inboundlinkscount_i", yacydoc.inboundLinkCount());
+ if (isEmpty() || contains("inboundlinksnoindexcount_i")) addSolr(solrdoc, "inboundlinksnoindexcount_i", yacydoc.inboundLinkNoindexCount());
+ if (isEmpty() || contains("attr_inboundlinks")) {
final String[] inboundlinks = new String[yacydoc.inboundLinkCount()];
for (final MultiProtocolURI url: yacydoc.inboundLinks()) {
final Properties p = alllinks.get(url);
@@ -135,23 +136,24 @@ public class SolrScheme extends ConfigurationSet {
final String rel = p.getProperty("rel", "");
inboundlinks[c++] =
" 0 ? " rel=\"" + rel + "\"" : "") +
">" +
((name.length() > 0) ? name : "") + "";
}
addSolr(solrdoc, "attr_inboundlinks", inboundlinks);
}
c = 0;
- final String[] outboundlinks = new String[yacydoc.outboundLinkCount()];
- if (contains("attr_outboundlinks")) {
- addSolr(solrdoc, "outboundlinkscount_i", outboundlinks.length);
+ if (isEmpty() || contains("outboundlinkscount_i")) addSolr(solrdoc, "outboundlinkscount_i", yacydoc.outboundLinkCount());
+ if (isEmpty() || contains("outboundlinksnoindexcount_i")) addSolr(solrdoc, "outboundlinksnoindexcount_i", yacydoc.outboundLinkNoindexCount());
+ if (isEmpty() || contains("attr_outboundlinks")) {
+ final String[] outboundlinks = new String[yacydoc.outboundLinkCount()];
for (final MultiProtocolURI url: yacydoc.outboundLinks()) {
final Properties p = alllinks.get(url);
final String name = p.getProperty("name", "");
final String rel = p.getProperty("rel", "");
outboundlinks[c++] =
" 0 ? " rel=\"" + rel + "\"" : "") +
">" +
((name.length() > 0) ? name : "") + "";
}
@@ -196,7 +198,7 @@ public class SolrScheme extends ConfigurationSet {
addSolr(solrdoc, "boldcount_i", bold.length);
if (bold.length > 0) {
addSolr(solrdoc, "attr_bold", bold);
- if (contains("attr_boldcount")) {
+ if (isEmpty() || contains("attr_boldcount")) {
addSolr(solrdoc, "attr_boldcount", html.getBoldCount(bold));
}
}
@@ -204,7 +206,7 @@ public class SolrScheme extends ConfigurationSet {
addSolr(solrdoc, "italiccount_i", italic.length);
if (italic.length > 0) {
addSolr(solrdoc, "attr_italic", italic);
- if (contains("attr_italiccount")) {
+ if (isEmpty() || contains("attr_italiccount")) {
addSolr(solrdoc, "attr_italiccount", html.getItalicCount(italic));
}
}
@@ -213,7 +215,7 @@ public class SolrScheme extends ConfigurationSet {
if (li.length > 0) addSolr(solrdoc, "attr_li", li);
// images
- if (contains("attr_images")) {
+ if (isEmpty() || contains("attr_images")) {
final Collection imagesc = html.getImages().values();
final String[] images = new String[imagesc.size()];
c = 0;
@@ -223,7 +225,7 @@ public class SolrScheme extends ConfigurationSet {
}
// style sheets
- if (contains("attr_css")) {
+ if (isEmpty() || contains("attr_css")) {
final Map csss = html.getCSS();
final String[] css = new String[csss.size()];
c = 0;
@@ -237,7 +239,7 @@ public class SolrScheme extends ConfigurationSet {
}
// Scripts
- if (contains("attr_scripts")) {
+ if (isEmpty() || contains("attr_scripts")) {
final Set scriptss = html.getScript();
final String[] scripts = new String[scriptss.size()];
c = 0;
@@ -249,7 +251,7 @@ public class SolrScheme extends ConfigurationSet {
}
// Frames
- if (contains("attr_frames")) {
+ if (isEmpty() || contains("attr_frames")) {
final Set framess = html.getFrames();
final String[] frames = new String[framess.size()];
c = 0;
@@ -261,7 +263,7 @@ public class SolrScheme extends ConfigurationSet {
}
// IFrames
- if (contains("attr_iframes")) {
+ if (isEmpty() || contains("attr_iframes")) {
final Set iframess = html.getIFrames();
final String[] iframes = new String[iframess.size()];
c = 0;
@@ -277,7 +279,7 @@ public class SolrScheme extends ConfigurationSet {
// generic evaluation pattern
for (final String model: html.getEvaluationModelNames()) {
- if (contains("attr_" + model)) {
+ if (isEmpty() || contains("attr_" + model)) {
final String[] scorenames = html.getEvaluationModelScoreNames(model);
if (scorenames.length > 0) {
addSolr(solrdoc, "attr_" + model, scorenames);
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index 140185fe7..d334c5a12 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -403,13 +403,15 @@ dc_rights
for (final Map.Entry entry: this.anchors.entrySet()) {
url = entry.getKey();
if (url == null) continue;
+ final boolean noindex = entry.getValue().getProperty("rel", "").toLowerCase().indexOf("noindex") >= 0;
+ final boolean nofollow = entry.getValue().getProperty("rel", "").toLowerCase().indexOf("nofollow") >= 0;
if ((thishost == null && url.getHost() == null) ||
((thishost != null && url.getHost() != null) &&
(url.getHost().endsWith(thishost) ||
(thishost.startsWith("www.") && url.getHost().endsWith(thishost.substring(4)))))) {
- this.inboundlinks.put(url, "anchor");
+ this.inboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
} else {
- this.outboundlinks.put(url, "anchor");
+ this.outboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
}
u = url.toNormalform(true, false);
final String name = entry.getValue().getProperty("name", "");
@@ -605,6 +607,26 @@ dc_rights
return (this.outboundlinks == null) ? 0 : this.outboundlinks.size();
}
+ public int inboundLinkNoindexCount() {
+ if (this.inboundlinks == null) resortLinks();
+ if (this.inboundlinks == null) return 0;
+ int c = 0;
+ for (final String tag: this.inboundlinks.values()) {
+ if (tag.contains("noindex")) c++;
+ }
+ return c;
+ }
+
+ public int outboundLinkNoindexCount() {
+ if (this.outboundlinks == null) resortLinks();
+ if (this.outboundlinks == null) return 0;
+ int c = 0;
+ for (final String tag: this.outboundlinks.values()) {
+ if (tag.contains("noindex")) c++;
+ }
+ return c;
+ }
+
public Set inboundLinks() {
if (this.inboundlinks == null) resortLinks();
return (this.inboundlinks == null) ? null : this.inboundlinks.keySet();