refactoring

This commit is contained in:
Michael Peter Christen 2012-08-10 06:47:13 +02:00
parent a12f693ec9
commit 136fcb1ad9
11 changed files with 199 additions and 198 deletions

View File

@ -39,7 +39,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.OS;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.SolrField;
import net.yacy.search.index.YaCySchema;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -189,7 +189,7 @@ public class IndexFederated_p {
int c = 0;
boolean dark = false;
// use enum SolrField to keep defined order
for(SolrField field : SolrField.values()) {
for(YaCySchema field : YaCySchema.values()) {
prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark;
prop.put("scheme_" + c + "_checked", sb.index.getSolrScheme().contains(field.name()) ? 1 : 0);
prop.putHTML("scheme_" + c + "_key", field.name());

View File

@ -25,7 +25,7 @@
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.index.SolrConfiguration;
import net.yacy.search.index.SolrField;
import net.yacy.search.index.YaCySchema;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -39,7 +39,7 @@ public class schema_p {
// write scheme
int c = 0;
SolrConfiguration solrScheme = sb.index.getSolrScheme();
for (SolrField field : SolrField.values()) {
for (YaCySchema field : YaCySchema.values()) {
if (solrScheme.contains(field.name())) {
prop.put("fields_" + c + "_solrname", field.getSolrFieldName());
prop.put("fields_" + c + "_type", field.getType().printName());
@ -53,8 +53,8 @@ public class schema_p {
}
prop.put("fields", c);
prop.put("solruniquekey",SolrField.id.getSolrFieldName());
prop.put("solrdefaultsearchfield",SolrField.text_t.getSolrFieldName());
prop.put("solruniquekey",YaCySchema.id.getSolrFieldName());
prop.put("solrdefaultsearchfield",YaCySchema.text_t.getSolrFieldName());
// return rewrite properties
return prop;
}

View File

@ -27,7 +27,7 @@ import java.util.Collection;
import java.util.List;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.index.SolrField;
import net.yacy.search.index.YaCySchema;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
@ -226,7 +226,7 @@ public class AbstractSolrConnector implements SolrConnector {
public SolrDocument get(final String id) throws IOException {
// construct query
StringBuffer sb = new StringBuffer(id.length() + 5);
sb.append(SolrField.id.getSolrFieldName()).append(':').append('"').append(id).append('"');
sb.append(YaCySchema.id.getSolrFieldName()).append(':').append('"').append(id).append('"');
final SolrQuery query = new SolrQuery();
query.setQuery(sb.toString());
query.setRows(1);

View File

@ -25,7 +25,7 @@
package net.yacy.cora.services.federated.solr;
public interface SolrField {
public interface Schema {
/**
* this shall be implemented as enum, thus shall have the name() method

View File

@ -31,7 +31,7 @@ import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.search.index.SolrField;
import net.yacy.search.index.YaCySchema;
public class ShardSelection {
@ -59,7 +59,7 @@ public class ShardSelection {
public int select(final SolrDoc solrdoc) throws IOException {
if (this.method == Method.MODULO_HOST_MD5) {
final String sku = (String) solrdoc.getField(SolrField.sku.getSolrFieldName()).getValue();
final String sku = (String) solrdoc.getField(YaCySchema.sku.getSolrFieldName()).getValue();
return selectURL(sku);
}

View File

@ -40,43 +40,43 @@ public class SolrDoc extends SolrInputDocument {
super();
}
public final void addSolr(final SolrField key, final String value) {
public final void addSolr(final Schema key, final String value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final Date value) {
public final void addSolr(final Schema key, final Date value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final int value) {
public final void addSolr(final Schema key, final int value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final long value) {
public final void addSolr(final Schema key, final long value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final String[] value) {
public final void addSolr(final Schema key, final String[] value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final List<String> value) {
public final void addSolr(final Schema key, final List<String> value) {
this.setField(key.getSolrFieldName(), value.toArray(new String[value.size()]));
}
public final void addSolr(final SolrField key, final float value) {
public final void addSolr(final Schema key, final float value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final double value) {
public final void addSolr(final Schema key, final double value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final boolean value) {
public final void addSolr(final Schema key, final boolean value) {
this.setField(key.getSolrFieldName(), value);
}
public final void addSolr(final SolrField key, final String value, final float boost) {
public final void addSolr(final Schema key, final String value, final float boost) {
this.setField(key.getSolrFieldName(), value, boost);
}

View File

@ -39,7 +39,7 @@ import java.util.logging.Logger;
import net.yacy.cora.storage.ConfigurationSet.Entry;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.index.SolrField;
import net.yacy.search.index.YaCySchema;
/**
* this class reads configuration attributes as a list of keywords from a list
* the list may contain lines with one keyword, comment lines, empty lines and out-commented keyword lines
@ -165,7 +165,7 @@ public class ConfigurationSet extends TreeMap<String,Entry> implements Serializa
if (modified) {
commit();
try {
SolrField f = SolrField.valueOf(key);
YaCySchema f = YaCySchema.valueOf(key);
f.setSolrFieldName(entry.getValue());
} catch (IllegalArgumentException e) {}
}

View File

@ -86,7 +86,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
Iterator<Entry> it = this.entryIterator();
for (ConfigurationSet.Entry etr = it.next(); it.hasNext(); etr = it.next()) {
try {
SolrField f = SolrField.valueOf(etr.key());
YaCySchema f = YaCySchema.valueOf(etr.key());
f.setSolrFieldName(etr.getValue());
} catch (IllegalArgumentException e) {
Log.logWarning("SolrScheme", "solr scheme file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'");
@ -94,7 +94,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
}
// check consistency the other way: look if all enum constants in SolrField appear in the configuration file
for (SolrField field: SolrField.values()) {
for (YaCySchema field: YaCySchema.values()) {
if (this.get(field.name()) == null) {
Log.logWarning("SolrScheme", " solr scheme file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
}
@ -102,51 +102,51 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
this.lazy = lazy;
}
private boolean contains(SolrField field) {
private boolean contains(YaCySchema field) {
return this.contains(field.name());
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final byte[] value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final byte[] value) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length != 0))) solrdoc.addSolr(key, UTF8.String(value));
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final String value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final String value) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final String value, final float boost) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final String value, final float boost) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) solrdoc.addSolr(key, value, boost);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final Date value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final Date value) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.getTime() > 0))) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final String[] value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final String[] value) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final List<String> value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final List<String> value) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final int value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final int value) {
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final long value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final long value) {
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final float value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final float value) {
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0.0f)) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final double value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final double value) {
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0.0d)) solrdoc.addSolr(key, value);
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final boolean value) {
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final boolean value) {
if (isEmpty() || contains(key)) solrdoc.addSolr(key, value);
}
@ -162,7 +162,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
Iterator<Entry> it = this.entryIterator();
for (ConfigurationSet.Entry etr = it.next(); it.hasNext(); etr = it.next()) {
try {
SolrField f = SolrField.valueOf(etr.key());
YaCySchema f = YaCySchema.valueOf(etr.key());
f.setSolrFieldName(etr.getValue());
} catch (IllegalArgumentException e) {
continue;
@ -176,22 +176,22 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
final DigestURI digestURI = new DigestURI(md.url());
boolean allAttr = this.isEmpty();
if (allAttr || contains(SolrField.failreason_t)) addSolr(solrdoc, SolrField.failreason_t, "");
addSolr(solrdoc, SolrField.id, ASCII.String(md.hash()));
addSolr(solrdoc, SolrField.sku, digestURI.toNormalform(true, false));
if (allAttr || contains(SolrField.ip_s)) {
if (allAttr || contains(YaCySchema.failreason_t)) addSolr(solrdoc, YaCySchema.failreason_t, "");
addSolr(solrdoc, YaCySchema.id, ASCII.String(md.hash()));
addSolr(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
if (allAttr || contains(YaCySchema.ip_s)) {
final InetAddress address = digestURI.getInetAddress();
if (address != null) addSolr(solrdoc, SolrField.ip_s, address.getHostAddress());
if (address != null) addSolr(solrdoc, YaCySchema.ip_s, address.getHostAddress());
}
if (digestURI.getHost() != null) addSolr(solrdoc, SolrField.host_s, digestURI.getHost());
if (allAttr || contains(SolrField.title)) addSolr(solrdoc, SolrField.title, md.dc_title());
if (allAttr || contains(SolrField.author)) addSolr(solrdoc, SolrField.author, md.dc_creator());
if (allAttr || contains(SolrField.description)) addSolr(solrdoc, SolrField.description, md.snippet());
if (allAttr || contains(SolrField.content_type)) addSolr(solrdoc, SolrField.content_type, Response.doctype2mime(digestURI.getFileExtension(), md.doctype()));
if (allAttr || contains(SolrField.last_modified)) addSolr(solrdoc, SolrField.last_modified, md.moddate());
if (allAttr || contains(SolrField.text_t)) addSolr(solrdoc, SolrField.text_t, ""); // not delivered in metadata
if (allAttr || contains(SolrField.wordcount_i)) addSolr(solrdoc, SolrField.wordcount_i, md.wordCount());
if (allAttr || contains(SolrField.keywords)) {
if (digestURI.getHost() != null) addSolr(solrdoc, YaCySchema.host_s, digestURI.getHost());
if (allAttr || contains(YaCySchema.title)) addSolr(solrdoc, YaCySchema.title, md.dc_title());
if (allAttr || contains(YaCySchema.author)) addSolr(solrdoc, YaCySchema.author, md.dc_creator());
if (allAttr || contains(YaCySchema.description)) addSolr(solrdoc, YaCySchema.description, md.snippet());
if (allAttr || contains(YaCySchema.content_type)) addSolr(solrdoc, YaCySchema.content_type, Response.doctype2mime(digestURI.getFileExtension(), md.doctype()));
if (allAttr || contains(YaCySchema.last_modified)) addSolr(solrdoc, YaCySchema.last_modified, md.moddate());
if (allAttr || contains(YaCySchema.text_t)) addSolr(solrdoc, YaCySchema.text_t, ""); // not delivered in metadata
if (allAttr || contains(YaCySchema.wordcount_i)) addSolr(solrdoc, YaCySchema.wordcount_i, md.wordCount());
if (allAttr || contains(YaCySchema.keywords)) {
String keywords = md.dc_subject();
Bitfield flags = md.flags();
if (flags.get(Condenser.flag_cat_indexof)) {
@ -199,41 +199,41 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (keywords.indexOf(',') > 0) keywords += ", indexof"; else keywords += " indexof";
}
}
addSolr(solrdoc, SolrField.keywords, keywords);
addSolr(solrdoc, YaCySchema.keywords, keywords);
}
// path elements of link
final String path = digestURI.getPath();
if (path != null && (allAttr || contains(SolrField.paths_txt))) {
if (path != null && (allAttr || contains(YaCySchema.paths_txt))) {
final String[] paths = path.split("/");
if (paths.length > 0) addSolr(solrdoc, SolrField.paths_txt, paths);
if (paths.length > 0) addSolr(solrdoc, YaCySchema.paths_txt, paths);
}
if (allAttr || contains(SolrField.imagescount_i)) addSolr(solrdoc, SolrField.imagescount_i, md.limage());
if (allAttr || contains(SolrField.inboundlinkscount_i)) addSolr(solrdoc, SolrField.inboundlinkscount_i, md.llocal());
if (allAttr || contains(SolrField.outboundlinkscount_i)) addSolr(solrdoc, SolrField.outboundlinkscount_i, md.lother());
if (allAttr || contains(SolrField.charset_s)) addSolr(solrdoc, SolrField.charset_s, "UTF8");
if (allAttr || contains(YaCySchema.imagescount_i)) addSolr(solrdoc, YaCySchema.imagescount_i, md.limage());
if (allAttr || contains(YaCySchema.inboundlinkscount_i)) addSolr(solrdoc, YaCySchema.inboundlinkscount_i, md.llocal());
if (allAttr || contains(YaCySchema.outboundlinkscount_i)) addSolr(solrdoc, YaCySchema.outboundlinkscount_i, md.lother());
if (allAttr || contains(YaCySchema.charset_s)) addSolr(solrdoc, YaCySchema.charset_s, "UTF8");
// coordinates
if (md.lat() != 0.0f && md.lon() != 0.0f) {
if (allAttr || contains(SolrField.lon_coordinate)) addSolr(solrdoc, SolrField.lon_coordinate, md.lon());
if (allAttr || contains(SolrField.lat_coordinate)) addSolr(solrdoc, SolrField.lat_coordinate, md.lat());
if (allAttr || contains(YaCySchema.lon_coordinate)) addSolr(solrdoc, YaCySchema.lon_coordinate, md.lon());
if (allAttr || contains(YaCySchema.lat_coordinate)) addSolr(solrdoc, YaCySchema.lat_coordinate, md.lat());
}
if (allAttr || contains(SolrField.httpstatus_i)) addSolr(solrdoc, SolrField.httpstatus_i, 200);
if (allAttr || contains(YaCySchema.httpstatus_i)) addSolr(solrdoc, YaCySchema.httpstatus_i, 200);
// fields that are in URIMetadataRow additional to yacy2solr basic requirement
if (allAttr || contains(SolrField.load_date_dt)) addSolr(solrdoc, SolrField.load_date_dt, md.loaddate());
if (allAttr || contains(SolrField.fresh_date_dt)) addSolr(solrdoc, SolrField.fresh_date_dt, md.freshdate());
if (allAttr || contains(SolrField.host_id_s)) addSolr(solrdoc, SolrField.host_id_s, md.hosthash());
if ((allAttr || contains(SolrField.referrer_id_txt)) && md.referrerHash() != null) addSolr(solrdoc, SolrField.referrer_id_txt, new String[]{ASCII.String(md.referrerHash())});
if (allAttr || contains(SolrField.md5_s)) addSolr(solrdoc, SolrField.md5_s, md.md5());
if (allAttr || contains(SolrField.publisher_t)) addSolr(solrdoc, SolrField.publisher_t, md.dc_publisher());
if ((allAttr || contains(SolrField.language_txt)) && md.language() != null) addSolr(solrdoc, SolrField.language_txt,new String[]{UTF8.String(md.language())});
if (allAttr || contains(SolrField.ranking_i)) addSolr(solrdoc, SolrField.ranking_i, md.ranking());
if (allAttr || contains(SolrField.size_i)) addSolr(solrdoc, SolrField.size_i, md.size());
if (allAttr || contains(SolrField.audiolinkscount_i)) addSolr(solrdoc, SolrField.audiolinkscount_i, md.laudio());
if (allAttr || contains(SolrField.videolinkscount_i)) addSolr(solrdoc, SolrField.videolinkscount_i, md.lvideo());
if (allAttr || contains(SolrField.applinkscount_i)) addSolr(solrdoc, SolrField.applinkscount_i, md.lapp());
if (allAttr || contains(YaCySchema.load_date_dt)) addSolr(solrdoc, YaCySchema.load_date_dt, md.loaddate());
if (allAttr || contains(YaCySchema.fresh_date_dt)) addSolr(solrdoc, YaCySchema.fresh_date_dt, md.freshdate());
if (allAttr || contains(YaCySchema.host_id_s)) addSolr(solrdoc, YaCySchema.host_id_s, md.hosthash());
if ((allAttr || contains(YaCySchema.referrer_id_txt)) && md.referrerHash() != null) addSolr(solrdoc, YaCySchema.referrer_id_txt, new String[]{ASCII.String(md.referrerHash())});
if (allAttr || contains(YaCySchema.md5_s)) addSolr(solrdoc, YaCySchema.md5_s, md.md5());
if (allAttr || contains(YaCySchema.publisher_t)) addSolr(solrdoc, YaCySchema.publisher_t, md.dc_publisher());
if ((allAttr || contains(YaCySchema.language_txt)) && md.language() != null) addSolr(solrdoc, YaCySchema.language_txt,new String[]{UTF8.String(md.language())});
if (allAttr || contains(YaCySchema.ranking_i)) addSolr(solrdoc, YaCySchema.ranking_i, md.ranking());
if (allAttr || contains(YaCySchema.size_i)) addSolr(solrdoc, YaCySchema.size_i, md.size());
if (allAttr || contains(YaCySchema.audiolinkscount_i)) addSolr(solrdoc, YaCySchema.audiolinkscount_i, md.laudio());
if (allAttr || contains(YaCySchema.videolinkscount_i)) addSolr(solrdoc, YaCySchema.videolinkscount_i, md.lvideo());
if (allAttr || contains(YaCySchema.applinkscount_i)) addSolr(solrdoc, YaCySchema.applinkscount_i, md.lapp());
return solrdoc;
}
@ -243,32 +243,32 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
final SolrDoc solrdoc = new SolrDoc();
final DigestURI digestURI = new DigestURI(yacydoc.dc_source());
boolean allAttr = this.isEmpty();
addSolr(solrdoc, SolrField.id, id);
addSolr(solrdoc, SolrField.sku, digestURI.toNormalform(true, false));
if (allAttr || contains(SolrField.failreason_t)) addSolr(solrdoc, SolrField.failreason_t, ""); // overwrite a possible fail reason (in case that there was a fail reason before)
if (allAttr || contains(SolrField.ip_s)) {
addSolr(solrdoc, YaCySchema.id, id);
addSolr(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
if (allAttr || contains(YaCySchema.failreason_t)) addSolr(solrdoc, YaCySchema.failreason_t, ""); // overwrite a possible fail reason (in case that there was a fail reason before)
if (allAttr || contains(YaCySchema.ip_s)) {
final InetAddress address = digestURI.getInetAddress();
if (address != null) addSolr(solrdoc, SolrField.ip_s, address.getHostAddress());
if (address != null) addSolr(solrdoc, YaCySchema.ip_s, address.getHostAddress());
}
if (digestURI.getHost() != null) addSolr(solrdoc, SolrField.host_s, digestURI.getHost());
if (allAttr || contains(SolrField.title)) addSolr(solrdoc, SolrField.title, yacydoc.dc_title());
if (allAttr || contains(SolrField.author)) addSolr(solrdoc, SolrField.author, yacydoc.dc_creator());
if (allAttr || contains(SolrField.description)) addSolr(solrdoc, SolrField.description, yacydoc.dc_description());
if (allAttr || contains(SolrField.content_type)) addSolr(solrdoc, SolrField.content_type, yacydoc.dc_format());
if (allAttr || contains(SolrField.last_modified)) addSolr(solrdoc, SolrField.last_modified, header == null ? new Date() : header.lastModified());
if (allAttr || contains(SolrField.keywords)) addSolr(solrdoc, SolrField.keywords, yacydoc.dc_subject(' '));
if (digestURI.getHost() != null) addSolr(solrdoc, YaCySchema.host_s, digestURI.getHost());
if (allAttr || contains(YaCySchema.title)) addSolr(solrdoc, YaCySchema.title, yacydoc.dc_title());
if (allAttr || contains(YaCySchema.author)) addSolr(solrdoc, YaCySchema.author, yacydoc.dc_creator());
if (allAttr || contains(YaCySchema.description)) addSolr(solrdoc, YaCySchema.description, yacydoc.dc_description());
if (allAttr || contains(YaCySchema.content_type)) addSolr(solrdoc, YaCySchema.content_type, yacydoc.dc_format());
if (allAttr || contains(YaCySchema.last_modified)) addSolr(solrdoc, YaCySchema.last_modified, header == null ? new Date() : header.lastModified());
if (allAttr || contains(YaCySchema.keywords)) addSolr(solrdoc, YaCySchema.keywords, yacydoc.dc_subject(' '));
final String content = yacydoc.getTextString();
if (allAttr || contains(SolrField.text_t)) addSolr(solrdoc, SolrField.text_t, content);
if (allAttr || contains(SolrField.wordcount_i)) {
if (allAttr || contains(YaCySchema.text_t)) addSolr(solrdoc, YaCySchema.text_t, content);
if (allAttr || contains(YaCySchema.wordcount_i)) {
final int contentwc = content.split(" ").length;
addSolr(solrdoc, SolrField.wordcount_i, contentwc);
addSolr(solrdoc, YaCySchema.wordcount_i, contentwc);
}
// path elements of link
final String path = digestURI.getPath();
if (path != null && (allAttr || contains(SolrField.paths_txt))) {
if (path != null && (allAttr || contains(YaCySchema.paths_txt))) {
final String[] paths = path.split("/");
if (paths.length > 0) addSolr(solrdoc, SolrField.paths_txt, paths);
if (paths.length > 0) addSolr(solrdoc, YaCySchema.paths_txt, paths);
}
// get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme
@ -285,14 +285,14 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
int f = 1;
String[] hs;
hs = html.getHeadlines(1); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, SolrField.h1_txt, hs);
hs = html.getHeadlines(2); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, SolrField.h2_txt, hs);
hs = html.getHeadlines(3); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, SolrField.h3_txt, hs);
hs = html.getHeadlines(4); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, SolrField.h4_txt, hs);
hs = html.getHeadlines(5); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, SolrField.h5_txt, hs);
hs = html.getHeadlines(6); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, SolrField.h6_txt, hs);
hs = html.getHeadlines(1); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, YaCySchema.h1_txt, hs);
hs = html.getHeadlines(2); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, YaCySchema.h2_txt, hs);
hs = html.getHeadlines(3); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, YaCySchema.h3_txt, hs);
hs = html.getHeadlines(4); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, YaCySchema.h4_txt, hs);
hs = html.getHeadlines(5); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, YaCySchema.h5_txt, hs);
hs = html.getHeadlines(6); h = h | (hs.length > 0 ? f : 0); f = f * 2; addSolr(solrdoc, YaCySchema.h6_txt, hs);
addSolr(solrdoc, SolrField.htags_i, h);
addSolr(solrdoc, YaCySchema.htags_i, h);
// noindex and nofollow attributes
// from HTML (meta-tag in HTML header: robots)
@ -331,32 +331,32 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (x_robots_tag.indexOf("nofollow",0) >= 0) b += 2048; // set bit 11
if (x_robots_tag.indexOf("unavailable_after",0) >=0) b += 4096; // set bit 12
}
addSolr(solrdoc, SolrField.robots_i, b);
addSolr(solrdoc, YaCySchema.robots_i, b);
// meta tags: generator
final String generator = html.getMetas().get("generator");
if (generator != null) addSolr(solrdoc, SolrField.metagenerator_t, generator);
if (generator != null) addSolr(solrdoc, YaCySchema.metagenerator_t, generator);
// bold, italic
final String[] bold = html.getBold();
addSolr(solrdoc, SolrField.boldcount_i, bold.length);
addSolr(solrdoc, YaCySchema.boldcount_i, bold.length);
if (bold.length > 0) {
addSolr(solrdoc, SolrField.bold_txt, bold);
if (allAttr || contains(SolrField.bold_val)) {
addSolr(solrdoc, SolrField.bold_val, html.getBoldCount(bold));
addSolr(solrdoc, YaCySchema.bold_txt, bold);
if (allAttr || contains(YaCySchema.bold_val)) {
addSolr(solrdoc, YaCySchema.bold_val, html.getBoldCount(bold));
}
}
final String[] italic = html.getItalic();
addSolr(solrdoc, SolrField.italiccount_i, italic.length);
addSolr(solrdoc, YaCySchema.italiccount_i, italic.length);
if (italic.length > 0) {
addSolr(solrdoc, SolrField.italic_txt, italic);
if (allAttr || contains(SolrField.italic_val)) {
addSolr(solrdoc, SolrField.italic_val, html.getItalicCount(italic));
addSolr(solrdoc, YaCySchema.italic_txt, italic);
if (allAttr || contains(YaCySchema.italic_val)) {
addSolr(solrdoc, YaCySchema.italic_val, html.getItalicCount(italic));
}
}
final String[] li = html.getLi();
addSolr(solrdoc, SolrField.licount_i, li.length);
if (li.length > 0) addSolr(solrdoc, SolrField.li_txt, li);
addSolr(solrdoc, YaCySchema.licount_i, li.length);
if (li.length > 0) addSolr(solrdoc, YaCySchema.li_txt, li);
// images
final Collection<ImageEntry> imagesc = html.getImages().values();
@ -374,14 +374,14 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
imgstubs.add(uri.toString().substring(protocol.length() + 3));
imgalts.add(ie.alt());
}
if (allAttr || contains(SolrField.imagescount_i)) addSolr(solrdoc, SolrField.imagescount_i, imgtags.size());
if (allAttr || contains(SolrField.images_tag_txt)) addSolr(solrdoc, SolrField.images_tag_txt, imgtags);
if (allAttr || contains(SolrField.images_protocol_txt)) addSolr(solrdoc, SolrField.images_protocol_txt, protocolList2indexedList(imgprots));
if (allAttr || contains(SolrField.images_urlstub_txt)) addSolr(solrdoc, SolrField.images_urlstub_txt, imgstubs);
if (allAttr || contains(SolrField.images_alt_txt)) addSolr(solrdoc, SolrField.images_alt_txt, imgalts);
if (allAttr || contains(YaCySchema.imagescount_i)) addSolr(solrdoc, YaCySchema.imagescount_i, imgtags.size());
if (allAttr || contains(YaCySchema.images_tag_txt)) addSolr(solrdoc, YaCySchema.images_tag_txt, imgtags);
if (allAttr || contains(YaCySchema.images_protocol_txt)) addSolr(solrdoc, YaCySchema.images_protocol_txt, protocolList2indexedList(imgprots));
if (allAttr || contains(YaCySchema.images_urlstub_txt)) addSolr(solrdoc, YaCySchema.images_urlstub_txt, imgstubs);
if (allAttr || contains(YaCySchema.images_alt_txt)) addSolr(solrdoc, YaCySchema.images_alt_txt, imgalts);
// style sheets
if (allAttr || contains(SolrField.css_tag_txt)) {
if (allAttr || contains(YaCySchema.css_tag_txt)) {
final Map<MultiProtocolURI, String> csss = html.getCSS();
final String[] css_tag = new String[csss.size()];
final String[] css_url = new String[csss.size()];
@ -396,13 +396,13 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
css_url[c] = url;
c++;
}
addSolr(solrdoc, SolrField.csscount_i, css_tag.length);
if (css_tag.length > 0) addSolr(solrdoc, SolrField.css_tag_txt, css_tag);
if (css_url.length > 0) addSolr(solrdoc, SolrField.css_url_txt, css_url);
addSolr(solrdoc, YaCySchema.csscount_i, css_tag.length);
if (css_tag.length > 0) addSolr(solrdoc, YaCySchema.css_tag_txt, css_tag);
if (css_url.length > 0) addSolr(solrdoc, YaCySchema.css_url_txt, css_url);
}
// Scripts
if (allAttr || contains(SolrField.scripts_txt)) {
if (allAttr || contains(YaCySchema.scripts_txt)) {
final Set<MultiProtocolURI> scriptss = html.getScript();
final String[] scripts = new String[scriptss.size()];
c = 0;
@ -411,12 +411,12 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
ouboundLinks.remove(url);
scripts[c++] = url.toNormalform(false, false);
}
addSolr(solrdoc, SolrField.scriptscount_i, scripts.length);
if (scripts.length > 0) addSolr(solrdoc, SolrField.scripts_txt, scripts);
addSolr(solrdoc, YaCySchema.scriptscount_i, scripts.length);
if (scripts.length > 0) addSolr(solrdoc, YaCySchema.scripts_txt, scripts);
}
// Frames
if (allAttr || contains(SolrField.frames_txt)) {
if (allAttr || contains(YaCySchema.frames_txt)) {
final Set<MultiProtocolURI> framess = html.getFrames();
final String[] frames = new String[framess.size()];
c = 0;
@ -425,12 +425,12 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
ouboundLinks.remove(url);
frames[c++] = url.toNormalform(false, false);
}
addSolr(solrdoc, SolrField.framesscount_i, frames.length);
if (frames.length > 0) addSolr(solrdoc, SolrField.frames_txt, frames);
addSolr(solrdoc, YaCySchema.framesscount_i, frames.length);
if (frames.length > 0) addSolr(solrdoc, YaCySchema.frames_txt, frames);
}
// IFrames
if (allAttr || contains(SolrField.iframes_txt)) {
if (allAttr || contains(YaCySchema.iframes_txt)) {
final Set<MultiProtocolURI> iframess = html.getIFrames();
final String[] iframes = new String[iframess.size()];
c = 0;
@ -439,22 +439,22 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
ouboundLinks.remove(url);
iframes[c++] = url.toNormalform(false, false);
}
addSolr(solrdoc, SolrField.iframesscount_i, iframes.length);
if (iframes.length > 0) addSolr(solrdoc, SolrField.iframes_txt, iframes);
addSolr(solrdoc, YaCySchema.iframesscount_i, iframes.length);
if (iframes.length > 0) addSolr(solrdoc, YaCySchema.iframes_txt, iframes);
}
// canonical tag
if (allAttr || contains(SolrField.canonical_s)) {
if (allAttr || contains(YaCySchema.canonical_s)) {
final MultiProtocolURI canonical = html.getCanonical();
if (canonical != null) {
inboundLinks.remove(canonical);
ouboundLinks.remove(canonical);
addSolr(solrdoc, SolrField.canonical_s, canonical.toNormalform(false, false));
addSolr(solrdoc, YaCySchema.canonical_s, canonical.toNormalform(false, false));
}
}
// meta refresh tag
if (allAttr || contains(SolrField.refresh_s)) {
if (allAttr || contains(YaCySchema.refresh_s)) {
String refresh = html.getRefreshPath();
if (refresh != null && refresh.length() > 0) {
MultiProtocolURI refreshURL;
@ -463,23 +463,23 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (refreshURL != null) {
inboundLinks.remove(refreshURL);
ouboundLinks.remove(refreshURL);
addSolr(solrdoc, SolrField.refresh_s, refreshURL.toNormalform(false, false));
addSolr(solrdoc, YaCySchema.refresh_s, refreshURL.toNormalform(false, false));
}
} catch (MalformedURLException e) {
addSolr(solrdoc, SolrField.refresh_s, refresh);
addSolr(solrdoc, YaCySchema.refresh_s, refresh);
}
}
}
// flash embedded
if (allAttr || contains(SolrField.flash_b)) {
if (allAttr || contains(YaCySchema.flash_b)) {
MultiProtocolURI[] flashURLs = html.getFlash();
for (MultiProtocolURI u: flashURLs) {
// remove all flash links from ibound/outbound links
inboundLinks.remove(u);
ouboundLinks.remove(u);
}
addSolr(solrdoc, SolrField.flash_b, flashURLs.length > 0);
addSolr(solrdoc, YaCySchema.flash_b, flashURLs.length > 0);
}
// generic evaluation pattern
@ -487,21 +487,21 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (allAttr || contains("ext_" + model + "_txt")) {
final String[] scorenames = html.getEvaluationModelScoreNames(model);
if (scorenames.length > 0) {
addSolr(solrdoc, SolrField.valueOf("ext_" + model + "_txt"), scorenames);
addSolr(solrdoc, SolrField.valueOf("ext_" + model + "_val"), html.getEvaluationModelScoreCounts(model, scorenames));
addSolr(solrdoc, YaCySchema.valueOf("ext_" + model + "_txt"), scorenames);
addSolr(solrdoc, YaCySchema.valueOf("ext_" + model + "_val"), html.getEvaluationModelScoreCounts(model, scorenames));
}
}
}
// response time
addSolr(solrdoc, SolrField.responsetime_i, header == null ? 0 : Integer.parseInt(header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0")));
addSolr(solrdoc, YaCySchema.responsetime_i, header == null ? 0 : Integer.parseInt(header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0")));
}
// list all links
final Map<MultiProtocolURI, Properties> alllinks = yacydoc.getAnchors();
c = 0;
if (allAttr || contains(SolrField.inboundlinkscount_i)) addSolr(solrdoc, SolrField.inboundlinkscount_i, inboundLinks.size());
if (allAttr || contains(SolrField.inboundlinksnofollowcount_i)) addSolr(solrdoc, SolrField.inboundlinksnofollowcount_i, yacydoc.inboundLinkNofollowCount());
if (allAttr || contains(YaCySchema.inboundlinkscount_i)) addSolr(solrdoc, YaCySchema.inboundlinkscount_i, inboundLinks.size());
if (allAttr || contains(YaCySchema.inboundlinksnofollowcount_i)) addSolr(solrdoc, YaCySchema.inboundlinksnofollowcount_i, yacydoc.inboundLinkNofollowCount());
final List<String> inboundlinksTag = new ArrayList<String>(inboundLinks.size());
final List<String> inboundlinksURLProtocol = new ArrayList<String>(inboundLinks.size());
final List<String> inboundlinksURLStub = new ArrayList<String>(inboundLinks.size());
@ -529,17 +529,17 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
((text.length() > 0) ? text : "") + "</a>");
c++;
}
if (allAttr || contains(SolrField.inboundlinks_tag_txt)) addSolr(solrdoc, SolrField.inboundlinks_tag_txt, inboundlinksTag);
if (allAttr || contains(SolrField.inboundlinks_protocol_txt)) addSolr(solrdoc, SolrField.inboundlinks_protocol_txt, protocolList2indexedList(inboundlinksURLProtocol));
if (allAttr || contains(SolrField.inboundlinks_urlstub_txt)) addSolr(solrdoc, SolrField.inboundlinks_urlstub_txt, inboundlinksURLStub);
if (allAttr || contains(SolrField.inboundlinks_name_txt)) addSolr(solrdoc, SolrField.inboundlinks_name_txt, inboundlinksName);
if (allAttr || contains(SolrField.inboundlinks_rel_txt)) addSolr(solrdoc, SolrField.inboundlinks_rel_txt, inboundlinksRel);
if (allAttr || contains(SolrField.inboundlinks_relflags_txt)) addSolr(solrdoc, SolrField.inboundlinks_relflags_txt, relEval(inboundlinksRel));
if (allAttr || contains(SolrField.inboundlinks_text_txt)) addSolr(solrdoc, SolrField.inboundlinks_text_txt, inboundlinksText);
if (allAttr || contains(YaCySchema.inboundlinks_tag_txt)) addSolr(solrdoc, YaCySchema.inboundlinks_tag_txt, inboundlinksTag);
if (allAttr || contains(YaCySchema.inboundlinks_protocol_txt)) addSolr(solrdoc, YaCySchema.inboundlinks_protocol_txt, protocolList2indexedList(inboundlinksURLProtocol));
if (allAttr || contains(YaCySchema.inboundlinks_urlstub_txt)) addSolr(solrdoc, YaCySchema.inboundlinks_urlstub_txt, inboundlinksURLStub);
if (allAttr || contains(YaCySchema.inboundlinks_name_txt)) addSolr(solrdoc, YaCySchema.inboundlinks_name_txt, inboundlinksName);
if (allAttr || contains(YaCySchema.inboundlinks_rel_txt)) addSolr(solrdoc, YaCySchema.inboundlinks_rel_txt, inboundlinksRel);
if (allAttr || contains(YaCySchema.inboundlinks_relflags_txt)) addSolr(solrdoc, YaCySchema.inboundlinks_relflags_txt, relEval(inboundlinksRel));
if (allAttr || contains(YaCySchema.inboundlinks_text_txt)) addSolr(solrdoc, YaCySchema.inboundlinks_text_txt, inboundlinksText);
c = 0;
if (allAttr || contains(SolrField.outboundlinkscount_i)) addSolr(solrdoc, SolrField.outboundlinkscount_i, ouboundLinks.size());
if (allAttr || contains(SolrField.outboundlinksnofollowcount_i)) addSolr(solrdoc, SolrField.outboundlinksnofollowcount_i, yacydoc.outboundLinkNofollowCount());
if (allAttr || contains(YaCySchema.outboundlinkscount_i)) addSolr(solrdoc, YaCySchema.outboundlinkscount_i, ouboundLinks.size());
if (allAttr || contains(YaCySchema.outboundlinksnofollowcount_i)) addSolr(solrdoc, YaCySchema.outboundlinksnofollowcount_i, yacydoc.outboundLinkNofollowCount());
final List<String> outboundlinksTag = new ArrayList<String>(ouboundLinks.size());
final List<String> outboundlinksURLProtocol = new ArrayList<String>(ouboundLinks.size());
final List<String> outboundlinksURLStub = new ArrayList<String>(ouboundLinks.size());
@ -567,37 +567,37 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
((text.length() > 0) ? text : "") + "</a>");
c++;
}
if (allAttr || contains(SolrField.outboundlinks_tag_txt)) addSolr(solrdoc, SolrField.outboundlinks_tag_txt, outboundlinksTag);
if (allAttr || contains(SolrField.outboundlinks_protocol_txt)) addSolr(solrdoc, SolrField.outboundlinks_protocol_txt, protocolList2indexedList(outboundlinksURLProtocol));
if (allAttr || contains(SolrField.outboundlinks_urlstub_txt)) addSolr(solrdoc, SolrField.outboundlinks_urlstub_txt, outboundlinksURLStub);
if (allAttr || contains(SolrField.outboundlinks_name_txt)) addSolr(solrdoc, SolrField.outboundlinks_name_txt, outboundlinksName);
if (allAttr || contains(SolrField.outboundlinks_rel_txt)) addSolr(solrdoc, SolrField.outboundlinks_rel_txt, outboundlinksRel);
if (allAttr || contains(SolrField.outboundlinks_relflags_txt)) addSolr(solrdoc, SolrField.outboundlinks_relflags_txt, relEval(inboundlinksRel));
if (allAttr || contains(SolrField.outboundlinks_text_txt)) addSolr(solrdoc, SolrField.outboundlinks_text_txt, outboundlinksText);
if (allAttr || contains(YaCySchema.outboundlinks_tag_txt)) addSolr(solrdoc, YaCySchema.outboundlinks_tag_txt, outboundlinksTag);
if (allAttr || contains(YaCySchema.outboundlinks_protocol_txt)) addSolr(solrdoc, YaCySchema.outboundlinks_protocol_txt, protocolList2indexedList(outboundlinksURLProtocol));
if (allAttr || contains(YaCySchema.outboundlinks_urlstub_txt)) addSolr(solrdoc, YaCySchema.outboundlinks_urlstub_txt, outboundlinksURLStub);
if (allAttr || contains(YaCySchema.outboundlinks_name_txt)) addSolr(solrdoc, YaCySchema.outboundlinks_name_txt, outboundlinksName);
if (allAttr || contains(YaCySchema.outboundlinks_rel_txt)) addSolr(solrdoc, YaCySchema.outboundlinks_rel_txt, outboundlinksRel);
if (allAttr || contains(YaCySchema.outboundlinks_relflags_txt)) addSolr(solrdoc, YaCySchema.outboundlinks_relflags_txt, relEval(inboundlinksRel));
if (allAttr || contains(YaCySchema.outboundlinks_text_txt)) addSolr(solrdoc, YaCySchema.outboundlinks_text_txt, outboundlinksText);
// charset
if (allAttr || contains(SolrField.charset_s)) addSolr(solrdoc, SolrField.charset_s, yacydoc.getCharset());
if (allAttr || contains(YaCySchema.charset_s)) addSolr(solrdoc, YaCySchema.charset_s, yacydoc.getCharset());
// coordinates
if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) {
if (allAttr || contains(SolrField.lon_coordinate)) addSolr(solrdoc, SolrField.lon_coordinate, yacydoc.lon());
if (allAttr || contains(SolrField.lat_coordinate)) addSolr(solrdoc, SolrField.lat_coordinate, yacydoc.lat());
if (allAttr || contains(YaCySchema.lon_coordinate)) addSolr(solrdoc, YaCySchema.lon_coordinate, yacydoc.lon());
if (allAttr || contains(YaCySchema.lat_coordinate)) addSolr(solrdoc, YaCySchema.lat_coordinate, yacydoc.lat());
}
if (allAttr || contains(SolrField.httpstatus_i)) addSolr(solrdoc, SolrField.httpstatus_i, header == null ? 200 : header.getStatusCode());
if (allAttr || contains(YaCySchema.httpstatus_i)) addSolr(solrdoc, YaCySchema.httpstatus_i, header == null ? 200 : header.getStatusCode());
// fields that are additionally in URIMetadataRow
if (allAttr || contains(SolrField.load_date_dt)) addSolr(solrdoc, SolrField.load_date_dt, metadata.loaddate());
if (allAttr || contains(SolrField.fresh_date_dt)) addSolr(solrdoc, SolrField.fresh_date_dt, metadata.freshdate());
if (allAttr || contains(SolrField.host_id_s)) addSolr(solrdoc, SolrField.host_id_s, metadata.hosthash());
if ((allAttr || contains(SolrField.referrer_id_txt)) && metadata.referrerHash() != null) addSolr(solrdoc, SolrField.referrer_id_txt, new String[]{ASCII.String(metadata.referrerHash())});
if (allAttr || contains(YaCySchema.load_date_dt)) addSolr(solrdoc, YaCySchema.load_date_dt, metadata.loaddate());
if (allAttr || contains(YaCySchema.fresh_date_dt)) addSolr(solrdoc, YaCySchema.fresh_date_dt, metadata.freshdate());
if (allAttr || contains(YaCySchema.host_id_s)) addSolr(solrdoc, YaCySchema.host_id_s, metadata.hosthash());
if ((allAttr || contains(YaCySchema.referrer_id_txt)) && metadata.referrerHash() != null) addSolr(solrdoc, YaCySchema.referrer_id_txt, new String[]{ASCII.String(metadata.referrerHash())});
//if (allAttr || contains(SolrField.md5_s)) addSolr(solrdoc, SolrField.md5_s, new byte[0]);
if (allAttr || contains(SolrField.publisher_t)) addSolr(solrdoc, SolrField.publisher_t, yacydoc.dc_publisher());
if ((allAttr || contains(SolrField.language_txt)) && metadata.language() != null) addSolr(solrdoc, SolrField.language_txt,new String[]{UTF8.String(metadata.language())});
if (allAttr || contains(SolrField.ranking_i)) addSolr(solrdoc, SolrField.ranking_i, metadata.ranking());
if (allAttr || contains(SolrField.size_i)) addSolr(solrdoc, SolrField.size_i, metadata.size());
if (allAttr || contains(SolrField.audiolinkscount_i)) addSolr(solrdoc, SolrField.audiolinkscount_i, yacydoc.getAudiolinks().size());
if (allAttr || contains(SolrField.videolinkscount_i)) addSolr(solrdoc, SolrField.videolinkscount_i, yacydoc.getVideolinks().size());
if (allAttr || contains(SolrField.applinkscount_i)) addSolr(solrdoc, SolrField.applinkscount_i, yacydoc.getApplinks().size());
if (allAttr || contains(YaCySchema.publisher_t)) addSolr(solrdoc, YaCySchema.publisher_t, yacydoc.dc_publisher());
if ((allAttr || contains(YaCySchema.language_txt)) && metadata.language() != null) addSolr(solrdoc, YaCySchema.language_txt,new String[]{UTF8.String(metadata.language())});
if (allAttr || contains(YaCySchema.ranking_i)) addSolr(solrdoc, YaCySchema.ranking_i, metadata.ranking());
if (allAttr || contains(YaCySchema.size_i)) addSolr(solrdoc, YaCySchema.size_i, metadata.size());
if (allAttr || contains(YaCySchema.audiolinkscount_i)) addSolr(solrdoc, YaCySchema.audiolinkscount_i, yacydoc.getAudiolinks().size());
if (allAttr || contains(YaCySchema.videolinkscount_i)) addSolr(solrdoc, YaCySchema.videolinkscount_i, yacydoc.getVideolinks().size());
if (allAttr || contains(YaCySchema.applinkscount_i)) addSolr(solrdoc, YaCySchema.applinkscount_i, yacydoc.getApplinks().size());
return solrdoc;
}
@ -634,39 +634,39 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
public String solrGetID(final SolrDocument solr) {
return (String) solr.getFieldValue(SolrField.id.getSolrFieldName());
return (String) solr.getFieldValue(YaCySchema.id.getSolrFieldName());
}
public DigestURI solrGetURL(final SolrDocument solr) {
try {
return new DigestURI((String) solr.getFieldValue(SolrField.sku.getSolrFieldName()));
return new DigestURI((String) solr.getFieldValue(YaCySchema.sku.getSolrFieldName()));
} catch (final MalformedURLException e) {
return null;
}
}
public String solrGetTitle(final SolrDocument solr) {
return (String) solr.getFieldValue(SolrField.title.getSolrFieldName());
return (String) solr.getFieldValue(YaCySchema.title.getSolrFieldName());
}
public String solrGetText(final SolrDocument solr) {
return (String) solr.getFieldValue(SolrField.text_t.getSolrFieldName());
return (String) solr.getFieldValue(YaCySchema.text_t.getSolrFieldName());
}
public String solrGetAuthor(final SolrDocument solr) {
return (String) solr.getFieldValue(SolrField.author.getSolrFieldName());
return (String) solr.getFieldValue(YaCySchema.author.getSolrFieldName());
}
public String solrGetDescription(final SolrDocument solr) {
return (String) solr.getFieldValue(SolrField.description.getSolrFieldName());
return (String) solr.getFieldValue(YaCySchema.description.getSolrFieldName());
}
public Date solrGetDate(final SolrDocument solr) {
return (Date) solr.getFieldValue(SolrField.last_modified.getSolrFieldName());
return (Date) solr.getFieldValue(YaCySchema.last_modified.getSolrFieldName());
}
public Collection<String> solrGetKeywords(final SolrDocument solr) {
final Collection<Object> c = solr.getFieldValues(SolrField.keywords.getSolrFieldName());
final Collection<Object> c = solr.getFieldValues(YaCySchema.keywords.getSolrFieldName());
final ArrayList<String> a = new ArrayList<String>();
for (final Object s: c) {
a.add((String) s);
@ -683,20 +683,20 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
*/
public SolrDoc err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrDoc solrdoc = new SolrDoc();
addSolr(solrdoc, SolrField.id, ASCII.String(digestURI.hash()));
addSolr(solrdoc, SolrField.sku, digestURI.toNormalform(true, false));
addSolr(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash()));
addSolr(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) addSolr(solrdoc, SolrField.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) addSolr(solrdoc, SolrField.host_s, digestURI.getHost());
if (address != null) addSolr(solrdoc, YaCySchema.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) addSolr(solrdoc, YaCySchema.host_s, digestURI.getHost());
// path elements of link
final String path = digestURI.getPath();
if (path != null) {
final String[] paths = path.split("/");
if (paths.length > 0) addSolr(solrdoc, SolrField.paths_txt, paths);
if (paths.length > 0) addSolr(solrdoc, YaCySchema.paths_txt, paths);
}
addSolr(solrdoc, SolrField.failreason_t, failReason);
addSolr(solrdoc, SolrField.httpstatus_i, httpstatus);
addSolr(solrdoc, YaCySchema.failreason_t, failReason);
addSolr(solrdoc, YaCySchema.httpstatus_i, httpstatus);
return solrdoc;
}

View File

@ -24,9 +24,10 @@
package net.yacy.search.index;
import net.yacy.cora.services.federated.solr.Schema;
import net.yacy.cora.services.federated.solr.SolrType;
public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField {
public enum YaCySchema implements Schema {
id(SolrType.string, true, true, "primary key of document, the URL hash **mandatory field**"),
sku(SolrType.text_en_splitting_tight, true, true, false, true, "url of document"),
@ -121,7 +122,7 @@ public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField
ext_title_txt(SolrType.text_general, true, true, true, "names matching title expressions"),
ext_title_val(SolrType.integer, true, true, true, "number of matching title expressions"),
failreason_t(SolrType.text_general, true, true, "fail reason if a page was not loaded. if the page was loaded then this field is empty"),
// values used additionally by URIMetadataRow
load_date_dt(SolrType.date, true, true, "time when resource was loaded"),
fresh_date_dt(SolrType.date, true, true, "date until resource shall be considered as fresh"),
@ -142,7 +143,7 @@ public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField
private boolean multiValued, omitNorms;
private String comment;
private SolrField(final SolrType type, final boolean indexed, final boolean stored, final String comment) {
private YaCySchema(final SolrType type, final boolean indexed, final boolean stored, final String comment) {
this.type = type;
this.indexed = indexed;
this.stored = stored;
@ -151,12 +152,12 @@ public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField
this.comment = comment;
}
private SolrField(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final String comment) {
private YaCySchema(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final String comment) {
this(type, indexed, stored, comment);
this.multiValued = multiValued;
}
private SolrField(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final boolean omitNorms, final String comment) {
private YaCySchema(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final boolean omitNorms, final String comment) {
this(type, indexed, stored, multiValued, comment);
this.omitNorms = omitNorms;
}

View File

@ -55,7 +55,7 @@ import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.SolrField;
import net.yacy.search.index.YaCySchema;
import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet;
@ -501,7 +501,7 @@ public class SnippetProcess {
if (this.solr != null) {
SolrDocument sd = null;
StringBuilder querystring = new StringBuilder(17);
querystring.append(SolrField.id.getSolrFieldName()).append(':').append('"').append(ASCII.String(page.hash())).append('"');
querystring.append(YaCySchema.id.getSolrFieldName()).append(':').append('"').append(ASCII.String(page.hash())).append('"');
SolrDocumentList sdl = null;
try {
sdl = this.solr.query(querystring.toString(), 0, 1);

View File

@ -30,7 +30,7 @@ import net.yacy.cora.services.federated.solr.AbstractSolrConnector;
import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.services.federated.solr.SolrDoc;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.index.SolrField;
import net.yacy.search.index.YaCySchema;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.common.SolrDocument;
@ -158,12 +158,12 @@ public class EmbeddedSolrConnector extends AbstractSolrConnector implements Solr
try {
EmbeddedSolrConnector solr = new EmbeddedSolrConnector(storage, solr_config);
SolrDoc solrdoc = new SolrDoc();
solrdoc.addSolr(SolrField.id, "ABCD0000abcd");
solrdoc.addSolr(SolrField.title, "Lorem ipsum");
solrdoc.addSolr(SolrField.text_t, "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
solrdoc.addSolr(YaCySchema.id, "ABCD0000abcd");
solrdoc.addSolr(YaCySchema.title, "Lorem ipsum");
solrdoc.addSolr(YaCySchema.text_t, "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
solr.add(solrdoc);
SolrServlet.startServer("/solr", 8091, solr);
SolrDocumentList searchresult = solr.query(SolrField.text_t.name() + ":tempor", 0, 10);
SolrDocumentList searchresult = solr.query(YaCySchema.text_t.name() + ":tempor", 0, 10);
for (SolrDocument d : searchresult) {
System.out.println(d.toString());
}