mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- faster search: using different data structures that avoid multiplr calculations
- no more table copy for error-eco table - optional table copy for lurl-entries - more abstractions (less single constant strings) - better logging (using host names instead of ips) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4459 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
8358652fa9
commit
bd63999801
|
@ -102,7 +102,7 @@ public class BlogComments {
|
||||||
}
|
}
|
||||||
|
|
||||||
String pagename = post.get("page", "blog_default");
|
String pagename = post.get("page", "blog_default");
|
||||||
String ip = post.get("CLIENTIP", "127.0.0.1");
|
String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
|
||||||
|
|
||||||
String StrAuthor = post.get("author", "anonymous");
|
String StrAuthor = post.get("author", "anonymous");
|
||||||
|
|
||||||
|
|
|
@ -105,9 +105,9 @@ public class CrawlProfileEditor_p {
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
selentry = (entry)it.next();
|
selentry = (entry)it.next();
|
||||||
if (selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
|
if (selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
|
||||||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
|
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) /*||
|
||||||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
|
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
|
||||||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA))
|
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA)*/)
|
||||||
continue;
|
continue;
|
||||||
prop.put("profiles_" + count + "_name", selentry.name());
|
prop.put("profiles_" + count + "_name", selentry.name());
|
||||||
prop.put("profiles_" + count + "_handle", selentry.handle());
|
prop.put("profiles_" + count + "_handle", selentry.handle());
|
||||||
|
|
|
@ -212,7 +212,7 @@ public final class Settings_p {
|
||||||
}
|
}
|
||||||
|
|
||||||
// clientIP
|
// clientIP
|
||||||
prop.putHTML("clientIP", (String) header.get("CLIENTIP", "<unknown>"), true); // read an artificial header addendum
|
prop.putHTML("clientIP", (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"), true); // read an artificial header addendum
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* seed upload settings
|
* seed upload settings
|
||||||
|
|
|
@ -50,7 +50,7 @@ public class TestApplet {
|
||||||
//File templatefile=filehandler.getOverlayedFile((String)post.get("url"));
|
//File templatefile=filehandler.getOverlayedFile((String)post.get("url"));
|
||||||
File classfile = httpdFileHandler.getOverlayedClass((String)post.get("url"));
|
File classfile = httpdFileHandler.getOverlayedClass((String)post.get("url"));
|
||||||
httpHeader header2=new httpHeader();
|
httpHeader header2=new httpHeader();
|
||||||
header2.put("CLIENTIP", "127.0.0.1");
|
header2.put(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
|
||||||
header2.put("PATH", post.get("url"));
|
header2.put("PATH", post.get("url"));
|
||||||
serverObjects tp=null;
|
serverObjects tp=null;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -79,7 +79,7 @@ public class User{
|
||||||
prop.put("logged-in_identified-by", "2");
|
prop.put("logged-in_identified-by", "2");
|
||||||
//try via ip
|
//try via ip
|
||||||
if(entry == null){
|
if(entry == null){
|
||||||
entry=sb.userDB.ipAuth(((String)header.get("CLIENTIP", "xxxxxx")));
|
entry=sb.userDB.ipAuth(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx")));
|
||||||
if(entry != null){
|
if(entry != null){
|
||||||
prop.put("logged-in_identified-by", "0");
|
prop.put("logged-in_identified-by", "0");
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,7 @@ public class User{
|
||||||
//identified via form-login
|
//identified via form-login
|
||||||
//TODO: this does not work for a static admin, yet.
|
//TODO: this does not work for a static admin, yet.
|
||||||
}else if(post != null && post.containsKey("username") && post.containsKey("password")){
|
}else if(post != null && post.containsKey("username") && post.containsKey("password")){
|
||||||
//entry=sb.userDB.passwordAuth((String)post.get("username"), (String)post.get("password"), (String)header.get("CLIENTIP", "xxxxxx"));
|
//entry=sb.userDB.passwordAuth((String)post.get("username"), (String)post.get("password"), (String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx"));
|
||||||
String username=(String)post.get("username");
|
String username=(String)post.get("username");
|
||||||
String password=(String)post.get("password");
|
String password=(String)post.get("password");
|
||||||
|
|
||||||
|
@ -163,7 +163,7 @@ public class User{
|
||||||
if(post!=null && post.containsKey("logout")){
|
if(post!=null && post.containsKey("logout")){
|
||||||
prop.put("logged-in", "0");
|
prop.put("logged-in", "0");
|
||||||
if(entry != null){
|
if(entry != null){
|
||||||
entry.logout(((String)header.get("CLIENTIP", "xxxxxx")), userDB.getLoginToken(header.getHeaderCookies())); //todo: logout cookie
|
entry.logout(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx")), userDB.getLoginToken(header.getHeaderCookies())); //todo: logout cookie
|
||||||
}else{
|
}else{
|
||||||
sb.userDB.adminLogout(userDB.getLoginToken(header.getHeaderCookies()));
|
sb.userDB.adminLogout(userDB.getLoginToken(header.getHeaderCookies()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,7 +72,7 @@ public class ViewImage {
|
||||||
|
|
||||||
String urlString = post.get("url", "");
|
String urlString = post.get("url", "");
|
||||||
String urlLicense = post.get("code", "");
|
String urlLicense = post.get("code", "");
|
||||||
boolean auth = ((String) header.get("CLIENTIP", "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights
|
boolean auth = ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights
|
||||||
|
|
||||||
yacyURL url = null;
|
yacyURL url = null;
|
||||||
if ((urlString.length() > 0) && (auth)) try {
|
if ((urlString.length() > 0) && (auth)) try {
|
||||||
|
|
|
@ -88,7 +88,7 @@ public class Wiki {
|
||||||
|
|
||||||
String access = switchboard.getConfig("WikiAccess", "admin");
|
String access = switchboard.getConfig("WikiAccess", "admin");
|
||||||
String pagename = post.get("page", "start");
|
String pagename = post.get("page", "start");
|
||||||
String ip = post.get("CLIENTIP", "127.0.0.1");
|
String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
|
||||||
String author = post.get("author", "anonymous");
|
String author = post.get("author", "anonymous");
|
||||||
if (author.equals("anonymous")) {
|
if (author.equals("anonymous")) {
|
||||||
author = wikiBoard.guessAuthor(ip);
|
author = wikiBoard.guessAuthor(ip);
|
||||||
|
|
|
@ -78,7 +78,7 @@ public class welcome {
|
||||||
prop.put("hostip", "Unknown Host Exception");
|
prop.put("hostip", "Unknown Host Exception");
|
||||||
}
|
}
|
||||||
prop.put("port", serverCore.getPortNr(env.getConfig("port","8080")));
|
prop.put("port", serverCore.getPortNr(env.getConfig("port","8080")));
|
||||||
prop.put("clientip", (String) header.get("CLIENTIP", ""));
|
prop.put("clientip", (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, ""));
|
||||||
|
|
||||||
final String peertype = (yacyCore.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_JUNIOR : yacyCore.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN);
|
final String peertype = (yacyCore.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_JUNIOR : yacyCore.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN);
|
||||||
final boolean senior = (peertype.equals(yacySeed.PEERTYPE_SENIOR)) || (peertype.equals(yacySeed.PEERTYPE_PRINCIPAL));
|
final boolean senior = (peertype.equals(yacySeed.PEERTYPE_SENIOR)) || (peertype.equals(yacySeed.PEERTYPE_PRINCIPAL));
|
||||||
|
|
|
@ -103,7 +103,7 @@ public final class hello {
|
||||||
// if ((properTest != null) && (! properTest.substring(0,1).equals("IP"))) { return null; }
|
// if ((properTest != null) && (! properTest.substring(0,1).equals("IP"))) { return null; }
|
||||||
|
|
||||||
// we easily know the caller's IP:
|
// we easily know the caller's IP:
|
||||||
final String clientip = (String) header.get("CLIENTIP", "<unknown>"); // read an artificial header addendum
|
final String clientip = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an artificial header addendum
|
||||||
InetAddress ias = serverDomains.dnsResolve(clientip);
|
InetAddress ias = serverDomains.dnsResolve(clientip);
|
||||||
if (ias == null) {
|
if (ias == null) {
|
||||||
prop.put("message", "cannot resolve your IP from your reported location " + clientip);
|
prop.put("message", "cannot resolve your IP from your reported location " + clientip);
|
||||||
|
|
|
@ -282,7 +282,7 @@ public final class search {
|
||||||
// prepare search statistics
|
// prepare search statistics
|
||||||
Long trackerHandle = new Long(System.currentTimeMillis());
|
Long trackerHandle = new Long(System.currentTimeMillis());
|
||||||
HashMap<String, Object> searchProfile = theQuery.resultProfile(joincount, System.currentTimeMillis() - timestamp, urlRetrievalAllTime, snippetComputationAllTime);
|
HashMap<String, Object> searchProfile = theQuery.resultProfile(joincount, System.currentTimeMillis() - timestamp, urlRetrievalAllTime, snippetComputationAllTime);
|
||||||
String client = (String) header.get("CLIENTIP");
|
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
|
||||||
searchProfile.put("host", client);
|
searchProfile.put("host", client);
|
||||||
yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false);
|
yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false);
|
||||||
searchProfile.put("peername", (remotepeer == null) ? "unknown" : remotepeer.getName());
|
searchProfile.put("peername", (remotepeer == null) ? "unknown" : remotepeer.getName());
|
||||||
|
|
|
@ -89,14 +89,14 @@ public final class transfer {
|
||||||
final yacySeed opeer = yacyCore.seedDB.get(ohash);
|
final yacySeed opeer = yacyCore.seedDB.get(ohash);
|
||||||
if (opeer == null) {
|
if (opeer == null) {
|
||||||
// reject unknown peers: this does not appear fair, but anonymous senders are dangerous
|
// reject unknown peers: this does not appear fair, but anonymous senders are dangerous
|
||||||
sb.getLog().logFine("RankingTransmission: rejected unknown peer '" + ohash + "', current IP " + header.get("CLIENTIP", "unknown"));
|
sb.getLog().logFine("RankingTransmission: rejected unknown peer '" + ohash + "', current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown"));
|
||||||
return prop;
|
return prop;
|
||||||
}
|
}
|
||||||
opeer.setLastSeenUTC();
|
opeer.setLastSeenUTC();
|
||||||
|
|
||||||
if (filename.indexOf("..") >= 0) {
|
if (filename.indexOf("..") >= 0) {
|
||||||
// reject paths that contain '..' because they are dangerous
|
// reject paths that contain '..' because they are dangerous
|
||||||
sb.getLog().logFine("RankingTransmission: rejected wrong path '" + filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+ ", current IP " + header.get("CLIENTIP", "unknown"));
|
sb.getLog().logFine("RankingTransmission: rejected wrong path '" + filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+ ", current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown"));
|
||||||
return prop;
|
return prop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -257,7 +257,7 @@ public class yacysearch {
|
||||||
constraint,
|
constraint,
|
||||||
true);
|
true);
|
||||||
|
|
||||||
String client = (String) header.get("CLIENTIP"); // the search client who initiated the search
|
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
|
||||||
|
|
||||||
// tell all threads to do nothing for a specific time
|
// tell all threads to do nothing for a specific time
|
||||||
sb.intermissionAllThreads(10000);
|
sb.intermissionAllThreads(10000);
|
||||||
|
|
|
@ -155,7 +155,7 @@ public final class userDB {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
public Entry getUser(httpHeader header){
|
public Entry getUser(httpHeader header){
|
||||||
return getUser((String) header.get(httpHeader.AUTHORIZATION), (String)header.get("CLIENTIP"), header.getHeaderCookies());
|
return getUser((String) header.get(httpHeader.AUTHORIZATION), (String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP), header.getHeaderCookies());
|
||||||
}
|
}
|
||||||
public Entry getUser(String auth, String ip, String cookies){
|
public Entry getUser(String auth, String ip, String cookies){
|
||||||
Entry entry=null;
|
Entry entry=null;
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class httpSSI {
|
||||||
conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, path);
|
conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, path);
|
||||||
conProp.setProperty(httpHeader.CONNECTION_PROP_ARGS, args);
|
conProp.setProperty(httpHeader.CONNECTION_PROP_ARGS, args);
|
||||||
conProp.setProperty(httpHeader.CONNECTION_PROP_HTTP_VER, httpHeader.HTTP_VERSION_0_9);
|
conProp.setProperty(httpHeader.CONNECTION_PROP_HTTP_VER, httpHeader.HTTP_VERSION_0_9);
|
||||||
conProp.setProperty("CLIENTIP", "127.0.0.1");
|
conProp.setProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
|
||||||
header.put(httpHeader.AUTHORIZATION, authorization);
|
header.put(httpHeader.AUTHORIZATION, authorization);
|
||||||
httpdFileHandler.doGet(conProp, header, out);
|
httpdFileHandler.doGet(conProp, header, out);
|
||||||
}
|
}
|
||||||
|
|
|
@ -193,7 +193,7 @@ public final class httpd implements serverHandler {
|
||||||
public void initSession(serverCore.Session newsession) throws IOException {
|
public void initSession(serverCore.Session newsession) throws IOException {
|
||||||
this.session = newsession;
|
this.session = newsession;
|
||||||
this.userAddress = session.userAddress; // client InetAddress
|
this.userAddress = session.userAddress; // client InetAddress
|
||||||
this.clientIP = this.userAddress.getHostAddress();
|
this.clientIP = this.userAddress.getHostName();
|
||||||
if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost";
|
if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost";
|
||||||
if (this.clientIP.equals("0:0:0:0:0:0:0:1")) this.clientIP = "localhost";
|
if (this.clientIP.equals("0:0:0:0:0:0:0:1")) this.clientIP = "localhost";
|
||||||
if (this.clientIP.equals("127.0.0.1")) this.clientIP = "localhost";
|
if (this.clientIP.equals("127.0.0.1")) this.clientIP = "localhost";
|
||||||
|
@ -1147,7 +1147,7 @@ public final class httpd implements serverHandler {
|
||||||
// tp.put("host", serverCore.publicIP().getHostAddress());
|
// tp.put("host", serverCore.publicIP().getHostAddress());
|
||||||
// tp.put("port", switchboard.getConfig("port", "8080"));
|
// tp.put("port", switchboard.getConfig("port", "8080"));
|
||||||
|
|
||||||
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP,"127.0.0.1");
|
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
|
||||||
|
|
||||||
// check if ip is local ip address
|
// check if ip is local ip address
|
||||||
InetAddress hostAddress = serverDomains.dnsResolve(clientIP);
|
InetAddress hostAddress = serverDomains.dnsResolve(clientIP);
|
||||||
|
|
|
@ -303,13 +303,13 @@ public final class httpdFileHandler {
|
||||||
if ((path.substring(0,(pos==-1)?path.length():pos)).endsWith("_p") && (adminAccountBase64MD5.length() != 0)) {
|
if ((path.substring(0,(pos==-1)?path.length():pos)).endsWith("_p") && (adminAccountBase64MD5.length() != 0)) {
|
||||||
//authentication required
|
//authentication required
|
||||||
//userDB
|
//userDB
|
||||||
if(sb.userDB.hasAdminRight(authorization, conProp.getProperty("CLIENTIP"), requestHeader.getHeaderCookies())){
|
if(sb.userDB.hasAdminRight(authorization, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP), requestHeader.getHeaderCookies())){
|
||||||
//Authentication successful. remove brute-force flag
|
//Authentication successful. remove brute-force flag
|
||||||
serverCore.bfHost.remove(conProp.getProperty("CLIENTIP"));
|
serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
|
||||||
//static
|
//static
|
||||||
}else if(authorization != null && httpd.staticAdminAuthenticated(authorization.trim().substring(6), switchboard)==4){
|
}else if(authorization != null && httpd.staticAdminAuthenticated(authorization.trim().substring(6), switchboard)==4){
|
||||||
//Authentication successful. remove brute-force flag
|
//Authentication successful. remove brute-force flag
|
||||||
serverCore.bfHost.remove(conProp.getProperty("CLIENTIP"));
|
serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
|
||||||
//no auth
|
//no auth
|
||||||
}else if (authorization == null) {
|
}else if (authorization == null) {
|
||||||
// no authorization given in response. Ask for that
|
// no authorization given in response. Ask for that
|
||||||
|
@ -323,7 +323,7 @@ public final class httpdFileHandler {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
// a wrong authentication was given or the userDB user does not have admin access. Ask again
|
// a wrong authentication was given or the userDB user does not have admin access. Ask again
|
||||||
String clientIP = conProp.getProperty("CLIENTIP", "unknown-host");
|
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host");
|
||||||
serverLog.logInfo("HTTPD", "Wrong log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'");
|
serverLog.logInfo("HTTPD", "Wrong log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'");
|
||||||
Integer attempts = (Integer) serverCore.bfHost.get(clientIP);
|
Integer attempts = (Integer) serverCore.bfHost.get(clientIP);
|
||||||
if (attempts == null)
|
if (attempts == null)
|
||||||
|
@ -473,7 +473,7 @@ public final class httpdFileHandler {
|
||||||
// call an image-servlet to produce an on-the-fly - generated image
|
// call an image-servlet to produce an on-the-fly - generated image
|
||||||
Object img = null;
|
Object img = null;
|
||||||
try {
|
try {
|
||||||
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP"));
|
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
|
||||||
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
|
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
|
||||||
// in case that there are no args given, args = null or empty hashmap
|
// in case that there are no args given, args = null or empty hashmap
|
||||||
img = invokeServlet(targetClass, requestHeader, args);
|
img = invokeServlet(targetClass, requestHeader, args);
|
||||||
|
@ -527,7 +527,7 @@ public final class httpdFileHandler {
|
||||||
}
|
}
|
||||||
} else if ((targetClass != null) && (path.endsWith(".stream"))) {
|
} else if ((targetClass != null) && (path.endsWith(".stream"))) {
|
||||||
// call rewrite-class
|
// call rewrite-class
|
||||||
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP"));
|
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
|
||||||
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
|
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
|
||||||
//requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body);
|
//requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body);
|
||||||
//requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out);
|
//requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out);
|
||||||
|
@ -570,7 +570,7 @@ public final class httpdFileHandler {
|
||||||
} else {
|
} else {
|
||||||
// CGI-class: call the class to create a property for rewriting
|
// CGI-class: call the class to create a property for rewriting
|
||||||
try {
|
try {
|
||||||
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP"));
|
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
|
||||||
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
|
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
|
||||||
// in case that there are no args given, args = null or empty hashmap
|
// in case that there are no args given, args = null or empty hashmap
|
||||||
Object tmp = invokeServlet(targetClass, requestHeader, args);
|
Object tmp = invokeServlet(targetClass, requestHeader, args);
|
||||||
|
@ -586,7 +586,7 @@ public final class httpdFileHandler {
|
||||||
if (tp.containsKey(servletProperties.ACTION_AUTHENTICATE)) {
|
if (tp.containsKey(servletProperties.ACTION_AUTHENTICATE)) {
|
||||||
// handle brute-force protection
|
// handle brute-force protection
|
||||||
if (authorization != null) {
|
if (authorization != null) {
|
||||||
String clientIP = conProp.getProperty("CLIENTIP", "unknown-host");
|
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host");
|
||||||
serverLog.logInfo("HTTPD", "dynamic log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'");
|
serverLog.logInfo("HTTPD", "dynamic log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'");
|
||||||
Integer attempts = (Integer) serverCore.bfHost.get(clientIP);
|
Integer attempts = (Integer) serverCore.bfHost.get(clientIP);
|
||||||
if (attempts == null)
|
if (attempts == null)
|
||||||
|
|
|
@ -47,8 +47,6 @@ public interface indexRWIEntry {
|
||||||
|
|
||||||
public String urlHash();
|
public String urlHash();
|
||||||
|
|
||||||
public int quality();
|
|
||||||
|
|
||||||
public int virtualAge();
|
public int virtualAge();
|
||||||
|
|
||||||
public long lastModified();
|
public long lastModified();
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
|
|
||||||
package de.anomic.index;
|
package de.anomic.index;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -55,12 +56,13 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
|
||||||
this.maxdomcount = 0;
|
this.maxdomcount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void normalizeWith(indexContainer container) {
|
public ArrayList<indexRWIVarEntry> normalizeWith(indexContainer container) {
|
||||||
// normalize ranking: find minimum and maxiumum of separate ranking criteria
|
// normalize ranking: find minimum and maxiumum of separate ranking criteria
|
||||||
assert (container != null);
|
assert (container != null);
|
||||||
|
ArrayList<indexRWIVarEntry> result = null;
|
||||||
|
|
||||||
//long s0 = System.currentTimeMillis();
|
//long s0 = System.currentTimeMillis();
|
||||||
if ((processors > 1) && (container.size() > 10000)) {
|
if ((processors > 1) && (container.size() > 600)) {
|
||||||
// run minmax with two threads
|
// run minmax with two threads
|
||||||
int middle = container.size() / 2;
|
int middle = container.size() / 2;
|
||||||
minmaxfinder mmf0 = new minmaxfinder(container, 0, middle);
|
minmaxfinder mmf0 = new minmaxfinder(container, 0, middle);
|
||||||
|
@ -83,6 +85,8 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
|
||||||
entry = di.next();
|
entry = di.next();
|
||||||
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
|
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
|
||||||
}
|
}
|
||||||
|
result = mmf0.decodedEntries;
|
||||||
|
result.addAll(mmf1.decodedContainer());
|
||||||
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
|
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
|
||||||
//System.out.println("***DEBUG*** indexRWIEntry.Order (2-THREADED): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
|
//System.out.println("***DEBUG*** indexRWIEntry.Order (2-THREADED): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
|
||||||
} else if (container.size() > 0) {
|
} else if (container.size() > 0) {
|
||||||
|
@ -97,10 +101,12 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
|
||||||
entry = di.next();
|
entry = di.next();
|
||||||
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
|
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
|
||||||
}
|
}
|
||||||
|
result = mmf.decodedContainer();
|
||||||
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
|
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
|
||||||
//System.out.println("***DEBUG*** indexRWIEntry.Order (ONETHREAD): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
|
//System.out.println("***DEBUG*** indexRWIEntry.Order (ONETHREAD): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
|
||||||
}
|
}
|
||||||
if (this.doms.size() > 0) this.maxdomcount = this.doms.getMaxScore();
|
if (this.doms.size() > 0) this.maxdomcount = this.doms.getMaxScore();
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public kelondroOrder<indexRWIVarEntry> clone() {
|
public kelondroOrder<indexRWIVarEntry> clone() {
|
||||||
|
@ -179,6 +185,7 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
|
||||||
private int start, end;
|
private int start, end;
|
||||||
private HashMap<String, Integer> doms;
|
private HashMap<String, Integer> doms;
|
||||||
private Integer int1;
|
private Integer int1;
|
||||||
|
ArrayList<indexRWIVarEntry> decodedEntries;
|
||||||
|
|
||||||
public minmaxfinder(indexContainer container, int start /*including*/, int end /*excluding*/) {
|
public minmaxfinder(indexContainer container, int start /*including*/, int end /*excluding*/) {
|
||||||
this.container = container;
|
this.container = container;
|
||||||
|
@ -186,18 +193,20 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
|
||||||
this.end = end;
|
this.end = end;
|
||||||
this.doms = new HashMap<String, Integer>();
|
this.doms = new HashMap<String, Integer>();
|
||||||
this.int1 = new Integer(1);
|
this.int1 = new Integer(1);
|
||||||
|
this.decodedEntries = new ArrayList<indexRWIVarEntry>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void run() {
|
public void run() {
|
||||||
// find min/max to obtain limits for normalization
|
// find min/max to obtain limits for normalization
|
||||||
this.entryMin = null;
|
this.entryMin = null;
|
||||||
this.entryMax = null;
|
this.entryMax = null;
|
||||||
indexRWIRowEntry iEntry;
|
indexRWIVarEntry iEntry;
|
||||||
int p = this.start;
|
int p = this.start;
|
||||||
String dom;
|
String dom;
|
||||||
Integer count;
|
Integer count;
|
||||||
while (p < this.end) {
|
while (p < this.end) {
|
||||||
iEntry = new indexRWIRowEntry(container.get(p++));
|
iEntry = new indexRWIVarEntry(new indexRWIRowEntry(container.get(p++)));
|
||||||
|
this.decodedEntries.add(iEntry);
|
||||||
// find min/max
|
// find min/max
|
||||||
if (this.entryMin == null) this.entryMin = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.min(this.entryMin, iEntry);
|
if (this.entryMin == null) this.entryMin = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.min(this.entryMin, iEntry);
|
||||||
if (this.entryMax == null) this.entryMax = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.max(this.entryMax, iEntry);
|
if (this.entryMax == null) this.entryMax = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.max(this.entryMax, iEntry);
|
||||||
|
@ -212,6 +221,10 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ArrayList<indexRWIVarEntry> decodedContainer() {
|
||||||
|
return this.decodedEntries;
|
||||||
|
}
|
||||||
|
|
||||||
public HashMap<String, Integer> domcount() {
|
public HashMap<String, Integer> domcount() {
|
||||||
return this.doms;
|
return this.doms;
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,8 @@ public final class indexRWIRowEntry implements indexRWIEntry {
|
||||||
private static final int col_worddistance = 18; // i 1 initial zero; may be used as reserve: is filled during search
|
private static final int col_worddistance = 18; // i 1 initial zero; may be used as reserve: is filled during search
|
||||||
private static final int col_reserve = 19; // k 1 reserve
|
private static final int col_reserve = 19; // k 1 reserve
|
||||||
|
|
||||||
|
public double termFrequency;
|
||||||
|
|
||||||
private kelondroRow.Entry entry;
|
private kelondroRow.Entry entry;
|
||||||
|
|
||||||
public indexRWIRowEntry(String urlHash,
|
public indexRWIRowEntry(String urlHash,
|
||||||
|
@ -101,14 +103,14 @@ public final class indexRWIRowEntry implements indexRWIEntry {
|
||||||
int posinphrase, // position of word in its phrase
|
int posinphrase, // position of word in its phrase
|
||||||
int posofphrase, // number of the phrase where word appears
|
int posofphrase, // number of the phrase where word appears
|
||||||
int worddistance, // word distance; this is 0 by default, and set to the difference of posintext from two indexes if these are combined (simultanous search). If stored, this shows that the result was obtained by remote search
|
int worddistance, // word distance; this is 0 by default, and set to the difference of posintext from two indexes if these are combined (simultanous search). If stored, this shows that the result was obtained by remote search
|
||||||
int sizeOfPage, // # of bytes of the page TODO: not needed any more
|
|
||||||
long lastmodified, // last-modified time of the document where word appears
|
long lastmodified, // last-modified time of the document where word appears
|
||||||
long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
|
long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
|
||||||
String language, // (guessed) language of document
|
String language, // (guessed) language of document
|
||||||
char doctype, // type of document
|
char doctype, // type of document
|
||||||
int outlinksSame, // outlinks to same domain
|
int outlinksSame, // outlinks to same domain
|
||||||
int outlinksOther, // outlinks to other domain
|
int outlinksOther, // outlinks to other domain
|
||||||
kelondroBitfield flags // attributes to the url and to the word according the url
|
kelondroBitfield flags, // attributes to the url and to the word according the url
|
||||||
|
double termFrequency
|
||||||
) {
|
) {
|
||||||
|
|
||||||
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
|
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
|
||||||
|
@ -136,6 +138,7 @@ public final class indexRWIRowEntry implements indexRWIEntry {
|
||||||
this.entry.setCol(col_posofphrase, posofphrase);
|
this.entry.setCol(col_posofphrase, posofphrase);
|
||||||
this.entry.setCol(col_worddistance, worddistance);
|
this.entry.setCol(col_worddistance, worddistance);
|
||||||
this.entry.setCol(col_reserve, 0);
|
this.entry.setCol(col_reserve, 0);
|
||||||
|
this.termFrequency = termFrequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
public indexRWIRowEntry(String urlHash, String code) {
|
public indexRWIRowEntry(String urlHash, String code) {
|
||||||
|
@ -183,10 +186,6 @@ public final class indexRWIRowEntry implements indexRWIEntry {
|
||||||
return this.entry.getColString(col_urlhash, null);
|
return this.entry.getColString(col_urlhash, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int quality() {
|
|
||||||
return 0; // not used any more
|
|
||||||
}
|
|
||||||
|
|
||||||
public int virtualAge() {
|
public int virtualAge() {
|
||||||
return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format
|
return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format
|
||||||
}
|
}
|
||||||
|
@ -256,7 +255,8 @@ public final class indexRWIRowEntry implements indexRWIEntry {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double termFrequency() {
|
public double termFrequency() {
|
||||||
return (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
|
if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
|
||||||
|
return this.termFrequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
@ -288,18 +288,12 @@ public final class indexRWIRowEntry implements indexRWIEntry {
|
||||||
public boolean isNewer(indexRWIEntry other) {
|
public boolean isNewer(indexRWIEntry other) {
|
||||||
if (other == null) return true;
|
if (other == null) return true;
|
||||||
if (this.lastModified() > other.lastModified()) return true;
|
if (this.lastModified() > other.lastModified()) return true;
|
||||||
if (this.lastModified() == other.lastModified()) {
|
|
||||||
if (this.quality() > other.quality()) return true;
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isOlder(indexRWIEntry other) {
|
public boolean isOlder(indexRWIEntry other) {
|
||||||
if (other == null) return false;
|
if (other == null) return false;
|
||||||
if (this.lastModified() < other.lastModified()) return true;
|
if (this.lastModified() < other.lastModified()) return true;
|
||||||
if (this.lastModified() == other.lastModified()) {
|
|
||||||
if (this.quality() < other.quality()) return true;
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
|
||||||
public char type;
|
public char type;
|
||||||
public int hitcount, llocal, lother, phrasesintext, posintext,
|
public int hitcount, llocal, lother, phrasesintext, posintext,
|
||||||
posinphrase, posofphrase,
|
posinphrase, posofphrase,
|
||||||
quality, urlcomps, urllength, virtualAge,
|
urlcomps, urllength, virtualAge,
|
||||||
worddistance, wordsintext, wordsintitle;
|
worddistance, wordsintext, wordsintitle;
|
||||||
public double termFrequency;
|
public double termFrequency;
|
||||||
|
|
||||||
|
@ -55,7 +55,6 @@ public class indexRWIVarEntry implements indexRWIEntry {
|
||||||
this.posintext = e.posintext();
|
this.posintext = e.posintext();
|
||||||
this.posinphrase = e.posinphrase();
|
this.posinphrase = e.posinphrase();
|
||||||
this.posofphrase = e.posofphrase();
|
this.posofphrase = e.posofphrase();
|
||||||
this.quality = e.quality();
|
|
||||||
this.urlcomps = e.urlcomps();
|
this.urlcomps = e.urlcomps();
|
||||||
this.urllength = e.urllength();
|
this.urllength = e.urllength();
|
||||||
this.virtualAge = e.virtualAge();
|
this.virtualAge = e.virtualAge();
|
||||||
|
@ -134,8 +133,28 @@ public class indexRWIVarEntry implements indexRWIEntry {
|
||||||
return posofphrase;
|
return posofphrase;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int quality() {
|
private indexRWIRowEntry toRowEntry() {
|
||||||
return quality;
|
return new indexRWIRowEntry(
|
||||||
|
urlHash,
|
||||||
|
urllength, // byte-length of complete URL
|
||||||
|
urlcomps, // number of path components
|
||||||
|
wordsintitle, // length of description/length (longer are better?)
|
||||||
|
hitcount, // how often appears this word in the text
|
||||||
|
wordsintext, // total number of words
|
||||||
|
phrasesintext, // total number of phrases
|
||||||
|
posintext, // position of word in all words
|
||||||
|
posinphrase, // position of word in its phrase
|
||||||
|
posofphrase, // number of the phrase where word appears
|
||||||
|
worddistance, // word distance
|
||||||
|
lastModified, // last-modified time of the document where word appears
|
||||||
|
System.currentTimeMillis(), // update time;
|
||||||
|
language, // (guessed) language of document
|
||||||
|
type, // type of document
|
||||||
|
llocal, // outlinks to same domain
|
||||||
|
lother, // outlinks to other domain
|
||||||
|
flags, // attributes to the url and to the word according the url
|
||||||
|
termFrequency
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Entry toKelondroEntry() {
|
public Entry toKelondroEntry() {
|
||||||
|
@ -144,8 +163,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toPropertyForm() {
|
public String toPropertyForm() {
|
||||||
assert false; // should not be used
|
return toRowEntry().toPropertyForm();
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String urlHash() {
|
public String urlHash() {
|
||||||
|
@ -177,7 +195,8 @@ public class indexRWIVarEntry implements indexRWIEntry {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double termFrequency() {
|
public double termFrequency() {
|
||||||
return termFrequency;
|
if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
|
||||||
|
return this.termFrequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final void min(indexRWIVarEntry t, indexRWIEntry other) {
|
public static final void min(indexRWIVarEntry t, indexRWIEntry other) {
|
||||||
|
@ -187,7 +206,6 @@ public class indexRWIVarEntry implements indexRWIEntry {
|
||||||
if (t.hitcount() > (v = other.hitcount())) t.hitcount = v;
|
if (t.hitcount() > (v = other.hitcount())) t.hitcount = v;
|
||||||
if (t.llocal() > (v = other.llocal())) t.llocal = v;
|
if (t.llocal() > (v = other.llocal())) t.llocal = v;
|
||||||
if (t.lother() > (v = other.lother())) t.lother = v;
|
if (t.lother() > (v = other.lother())) t.lother = v;
|
||||||
if (t.quality() > (v = other.quality())) t.quality = v;
|
|
||||||
if (t.virtualAge() > (v = other.virtualAge())) t.virtualAge = v;
|
if (t.virtualAge() > (v = other.virtualAge())) t.virtualAge = v;
|
||||||
if (t.wordsintext() > (v = other.wordsintext())) t.wordsintext = v;
|
if (t.wordsintext() > (v = other.wordsintext())) t.wordsintext = v;
|
||||||
if (t.phrasesintext() > (v = other.phrasesintext())) t.phrasesintext = v;
|
if (t.phrasesintext() > (v = other.phrasesintext())) t.phrasesintext = v;
|
||||||
|
@ -210,7 +228,6 @@ public class indexRWIVarEntry implements indexRWIEntry {
|
||||||
if (t.hitcount() < (v = other.hitcount())) t.hitcount = v;
|
if (t.hitcount() < (v = other.hitcount())) t.hitcount = v;
|
||||||
if (t.llocal() < (v = other.llocal())) t.llocal = v;
|
if (t.llocal() < (v = other.llocal())) t.llocal = v;
|
||||||
if (t.lother() < (v = other.lother())) t.lother = v;
|
if (t.lother() < (v = other.lother())) t.lother = v;
|
||||||
if (t.quality() < (v = other.quality())) t.quality = v;
|
|
||||||
if (t.virtualAge() < (v = other.virtualAge())) t.virtualAge = v;
|
if (t.virtualAge() < (v = other.virtualAge())) t.virtualAge = v;
|
||||||
if (t.wordsintext() < (v = other.wordsintext())) t.wordsintext = v;
|
if (t.wordsintext() < (v = other.wordsintext())) t.wordsintext = v;
|
||||||
if (t.phrasesintext() < (v = other.phrasesintext())) t.phrasesintext = v;
|
if (t.phrasesintext() < (v = other.phrasesintext())) t.phrasesintext = v;
|
||||||
|
|
|
@ -115,7 +115,7 @@ public class indexURLEntry {
|
||||||
|
|
||||||
private kelondroRow.Entry entry;
|
private kelondroRow.Entry entry;
|
||||||
private String snippet;
|
private String snippet;
|
||||||
private indexRWIRowEntry word; // this is only used if the url is transported via remote search requests
|
private indexRWIEntry word; // this is only used if the url is transported via remote search requests
|
||||||
private long ranking; // during generation of a search result this value is set
|
private long ranking; // during generation of a search result this value is set
|
||||||
|
|
||||||
public indexURLEntry(
|
public indexURLEntry(
|
||||||
|
@ -185,7 +185,7 @@ public class indexURLEntry {
|
||||||
return s.toString().getBytes();
|
return s.toString().getBytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
public indexURLEntry(kelondroRow.Entry entry, indexRWIRowEntry searchedWord, long ranking) {
|
public indexURLEntry(kelondroRow.Entry entry, indexRWIEntry searchedWord, long ranking) {
|
||||||
this.entry = entry;
|
this.entry = entry;
|
||||||
this.snippet = null;
|
this.snippet = null;
|
||||||
this.word = searchedWord;
|
this.word = searchedWord;
|
||||||
|
@ -391,7 +391,7 @@ public class indexURLEntry {
|
||||||
return snippet;
|
return snippet;
|
||||||
}
|
}
|
||||||
|
|
||||||
public indexRWIRowEntry word() {
|
public indexRWIEntry word() {
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ public class kelondroSplitTable implements kelondroIndex {
|
||||||
// this is a kelonodroFlex table
|
// this is a kelonodroFlex table
|
||||||
table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail));
|
table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail));
|
||||||
} else {
|
} else {
|
||||||
table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0);
|
table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
|
||||||
}
|
}
|
||||||
tables.put(date, table);
|
tables.put(date, table);
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,7 +66,7 @@ import java.util.LinkedList;
|
||||||
import de.anomic.data.htmlTools;
|
import de.anomic.data.htmlTools;
|
||||||
import de.anomic.http.httpc;
|
import de.anomic.http.httpc;
|
||||||
import de.anomic.http.httpc.response;
|
import de.anomic.http.httpc.response;
|
||||||
import de.anomic.index.indexRWIRowEntry;
|
import de.anomic.index.indexRWIEntry;
|
||||||
import de.anomic.index.indexURLEntry;
|
import de.anomic.index.indexURLEntry;
|
||||||
import de.anomic.kelondro.kelondroBase64Order;
|
import de.anomic.kelondro.kelondroBase64Order;
|
||||||
import de.anomic.kelondro.kelondroCache;
|
import de.anomic.kelondro.kelondroCache;
|
||||||
|
@ -153,7 +153,7 @@ public final class plasmaCrawlLURL {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized indexURLEntry load(String urlHash, indexRWIRowEntry searchedWord, long ranking) {
|
public synchronized indexURLEntry load(String urlHash, indexRWIEntry searchedWord, long ranking) {
|
||||||
// generates an plasmaLURLEntry using the url hash
|
// generates an plasmaLURLEntry using the url hash
|
||||||
// to speed up the access, the url-hashes are buffered
|
// to speed up the access, the url-hashes are buffered
|
||||||
// in the hash cache.
|
// in the hash cache.
|
||||||
|
|
|
@ -69,7 +69,7 @@ public class plasmaCrawlZURL {
|
||||||
if (f.isDirectory()) kelondroFlexTable.delete(cachePath, tablename); else f.delete();
|
if (f.isDirectory()) kelondroFlexTable.delete(cachePath, tablename); else f.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
urlIndex = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
|
urlIndex = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0);
|
||||||
//urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true);
|
//urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ package de.anomic.plasma;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -40,6 +41,7 @@ import de.anomic.index.indexContainer;
|
||||||
import de.anomic.index.indexRWIEntry;
|
import de.anomic.index.indexRWIEntry;
|
||||||
import de.anomic.index.indexRWIEntryOrder;
|
import de.anomic.index.indexRWIEntryOrder;
|
||||||
import de.anomic.index.indexRWIRowEntry;
|
import de.anomic.index.indexRWIRowEntry;
|
||||||
|
import de.anomic.index.indexRWIVarEntry;
|
||||||
import de.anomic.index.indexURLEntry;
|
import de.anomic.index.indexURLEntry;
|
||||||
import de.anomic.kelondro.kelondroBinSearch;
|
import de.anomic.kelondro.kelondroBinSearch;
|
||||||
import de.anomic.kelondro.kelondroMScoreCluster;
|
import de.anomic.kelondro.kelondroMScoreCluster;
|
||||||
|
@ -52,8 +54,8 @@ public final class plasmaSearchRankingProcess {
|
||||||
public static kelondroBinSearch[] ybrTables = null; // block-rank tables
|
public static kelondroBinSearch[] ybrTables = null; // block-rank tables
|
||||||
private static boolean useYBR = true;
|
private static boolean useYBR = true;
|
||||||
|
|
||||||
private TreeMap<Object, indexRWIRowEntry> sortedRWIEntries; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String
|
private TreeMap<Object, indexRWIVarEntry> sortedRWIEntries; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String
|
||||||
private HashMap<String, TreeMap<Object, indexRWIRowEntry>> doubleDomCache; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
|
private HashMap<String, TreeMap<Object, indexRWIVarEntry>> doubleDomCache; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
|
||||||
private HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
|
private HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
|
||||||
private plasmaSearchQuery query;
|
private plasmaSearchQuery query;
|
||||||
private int sortorder;
|
private int sortorder;
|
||||||
|
@ -72,8 +74,8 @@ public final class plasmaSearchRankingProcess {
|
||||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||||
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
||||||
this.localSearchContainerMaps = null;
|
this.localSearchContainerMaps = null;
|
||||||
this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>();
|
this.sortedRWIEntries = new TreeMap<Object, indexRWIVarEntry>();
|
||||||
this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIRowEntry>>();
|
this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIVarEntry>>();
|
||||||
this.handover = new HashMap<String, String>();
|
this.handover = new HashMap<String, String>();
|
||||||
this.order = null;
|
this.order = null;
|
||||||
this.query = query;
|
this.query = query;
|
||||||
|
@ -132,11 +134,11 @@ public final class plasmaSearchRankingProcess {
|
||||||
this.remote_indexCount += index.size();
|
this.remote_indexCount += index.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
indexRWIRowEntry ientry;
|
indexRWIVarEntry ientry;
|
||||||
indexURLEntry uentry;
|
indexURLEntry uentry;
|
||||||
String u;
|
String u;
|
||||||
loop: while (en.hasNext()) {
|
loop: while (en.hasNext()) {
|
||||||
ientry = en.next();
|
ientry = new indexRWIVarEntry(en.next());
|
||||||
|
|
||||||
// check constraints
|
// check constraints
|
||||||
if (!testFlags(ientry)) continue loop;
|
if (!testFlags(ientry)) continue loop;
|
||||||
|
@ -183,13 +185,13 @@ public final class plasmaSearchRankingProcess {
|
||||||
if (this.order == null) {
|
if (this.order == null) {
|
||||||
this.order = new indexRWIEntryOrder(query.ranking);
|
this.order = new indexRWIEntryOrder(query.ranking);
|
||||||
}
|
}
|
||||||
this.order.normalizeWith(index);
|
ArrayList<indexRWIVarEntry> decodedEntries = this.order.normalizeWith(index);
|
||||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer));
|
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer));
|
||||||
|
|
||||||
// normalize entries and get ranking
|
// normalize entries and get ranking
|
||||||
timer = System.currentTimeMillis();
|
timer = System.currentTimeMillis();
|
||||||
Iterator<indexRWIRowEntry> i = index.entries();
|
Iterator<indexRWIVarEntry> i = decodedEntries.iterator();
|
||||||
indexRWIRowEntry iEntry, l;
|
indexRWIVarEntry iEntry, l;
|
||||||
long biggestEntry = 0;
|
long biggestEntry = 0;
|
||||||
//long s0 = System.currentTimeMillis();
|
//long s0 = System.currentTimeMillis();
|
||||||
Long r;
|
Long r;
|
||||||
|
@ -272,8 +274,8 @@ public final class plasmaSearchRankingProcess {
|
||||||
private synchronized Object[] /*{Object, indexRWIEntry}*/ bestRWI(boolean skipDoubleDom) {
|
private synchronized Object[] /*{Object, indexRWIEntry}*/ bestRWI(boolean skipDoubleDom) {
|
||||||
// returns from the current RWI list the best entry and removed this entry from the list
|
// returns from the current RWI list the best entry and removed this entry from the list
|
||||||
Object bestEntry;
|
Object bestEntry;
|
||||||
TreeMap<Object, indexRWIRowEntry> m;
|
TreeMap<Object, indexRWIVarEntry> m;
|
||||||
indexRWIRowEntry rwi;
|
indexRWIVarEntry rwi;
|
||||||
while (sortedRWIEntries.size() > 0) {
|
while (sortedRWIEntries.size() > 0) {
|
||||||
bestEntry = sortedRWIEntries.firstKey();
|
bestEntry = sortedRWIEntries.firstKey();
|
||||||
rwi = sortedRWIEntries.remove(bestEntry);
|
rwi = sortedRWIEntries.remove(bestEntry);
|
||||||
|
@ -283,7 +285,7 @@ public final class plasmaSearchRankingProcess {
|
||||||
m = this.doubleDomCache.get(domhash);
|
m = this.doubleDomCache.get(domhash);
|
||||||
if (m == null) {
|
if (m == null) {
|
||||||
// first appearance of dom
|
// first appearance of dom
|
||||||
m = new TreeMap<Object, indexRWIRowEntry>();
|
m = new TreeMap<Object, indexRWIVarEntry>();
|
||||||
this.doubleDomCache.put(domhash, m);
|
this.doubleDomCache.put(domhash, m);
|
||||||
return new Object[]{bestEntry, rwi};
|
return new Object[]{bestEntry, rwi};
|
||||||
}
|
}
|
||||||
|
@ -292,10 +294,10 @@ public final class plasmaSearchRankingProcess {
|
||||||
}
|
}
|
||||||
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
|
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
|
||||||
// find best entry from all caches
|
// find best entry from all caches
|
||||||
Iterator<TreeMap<Object, indexRWIRowEntry>> i = this.doubleDomCache.values().iterator();
|
Iterator<TreeMap<Object, indexRWIVarEntry>> i = this.doubleDomCache.values().iterator();
|
||||||
bestEntry = null;
|
bestEntry = null;
|
||||||
Object o;
|
Object o;
|
||||||
indexRWIRowEntry bestrwi = null;
|
indexRWIVarEntry bestrwi = null;
|
||||||
while (i.hasNext()) {
|
while (i.hasNext()) {
|
||||||
m = i.next();
|
m = i.next();
|
||||||
if (m.size() == 0) continue;
|
if (m.size() == 0) continue;
|
||||||
|
@ -331,7 +333,7 @@ public final class plasmaSearchRankingProcess {
|
||||||
while ((sortedRWIEntries.size() > 0) || (size() > 0)) {
|
while ((sortedRWIEntries.size() > 0) || (size() > 0)) {
|
||||||
Object[] obrwi = bestRWI(skipDoubleDom);
|
Object[] obrwi = bestRWI(skipDoubleDom);
|
||||||
Object bestEntry = obrwi[0];
|
Object bestEntry = obrwi[0];
|
||||||
indexRWIRowEntry ientry = (indexRWIRowEntry) obrwi[1];
|
indexRWIVarEntry ientry = (indexRWIVarEntry) obrwi[1];
|
||||||
long ranking = (bestEntry instanceof Long) ? ((Long) bestEntry).longValue() : 0;
|
long ranking = (bestEntry instanceof Long) ? ((Long) bestEntry).longValue() : 0;
|
||||||
indexURLEntry u = wordIndex.loadedURL.load(ientry.urlHash(), ientry, ranking);
|
indexURLEntry u = wordIndex.loadedURL.load(ientry.urlHash(), ientry, ranking);
|
||||||
if (u != null) {
|
if (u != null) {
|
||||||
|
@ -347,7 +349,7 @@ public final class plasmaSearchRankingProcess {
|
||||||
public synchronized int size() {
|
public synchronized int size() {
|
||||||
//assert sortedRWIEntries.size() == urlhashes.size() : "sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() = " + urlhashes.size();
|
//assert sortedRWIEntries.size() == urlhashes.size() : "sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() = " + urlhashes.size();
|
||||||
int c = sortedRWIEntries.size();
|
int c = sortedRWIEntries.size();
|
||||||
Iterator<TreeMap<Object, indexRWIRowEntry>> i = this.doubleDomCache.values().iterator();
|
Iterator<TreeMap<Object, indexRWIVarEntry>> i = this.doubleDomCache.values().iterator();
|
||||||
while (i.hasNext()) c += i.next().size();
|
while (i.hasNext()) c += i.next().size();
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
|
@ -414,7 +414,7 @@ public class plasmaSnippetCache {
|
||||||
resInfo = entry.getDocumentInfo();
|
resInfo = entry.getDocumentInfo();
|
||||||
|
|
||||||
// read resource body (if it is there)
|
// read resource body (if it is there)
|
||||||
byte []resourceArray = entry.cacheArray();
|
byte[] resourceArray = entry.cacheArray();
|
||||||
if (resourceArray != null) {
|
if (resourceArray != null) {
|
||||||
resContent = new ByteArrayInputStream(resourceArray);
|
resContent = new ByteArrayInputStream(resourceArray);
|
||||||
resContentLength = resourceArray.length;
|
resContentLength = resourceArray.length;
|
||||||
|
|
|
@ -906,7 +906,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
File networkUnitDefinitionFile = new File(rootPath, networkUnitDefinition);
|
File networkUnitDefinitionFile = (networkUnitDefinition.startsWith("/")) ? new File(networkUnitDefinition) : new File(rootPath, networkUnitDefinition);
|
||||||
if (networkUnitDefinitionFile.exists()) {
|
if (networkUnitDefinitionFile.exists()) {
|
||||||
initProps = serverFileUtils.loadHashMap(networkUnitDefinitionFile);
|
initProps = serverFileUtils.loadHashMap(networkUnitDefinitionFile);
|
||||||
this.setConfig(initProps);
|
this.setConfig(initProps);
|
||||||
|
@ -2348,14 +2348,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
||||||
wordStat.posInPhrase,
|
wordStat.posInPhrase,
|
||||||
wordStat.numOfPhrase,
|
wordStat.numOfPhrase,
|
||||||
0,
|
0,
|
||||||
newEntry.size(),
|
|
||||||
docDate.getTime(),
|
docDate.getTime(),
|
||||||
System.currentTimeMillis(),
|
System.currentTimeMillis(),
|
||||||
language,
|
language,
|
||||||
doctype,
|
doctype,
|
||||||
ioLinks[0].intValue(),
|
ioLinks[0].intValue(),
|
||||||
ioLinks[1].intValue(),
|
ioLinks[1].intValue(),
|
||||||
condenser.RESULT_FLAGS
|
condenser.RESULT_FLAGS,
|
||||||
|
0.0
|
||||||
);
|
);
|
||||||
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1);
|
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1);
|
||||||
wordIdxContainer.add(wordIdxEntry);
|
wordIdxContainer.add(wordIdxEntry);
|
||||||
|
@ -2573,10 +2573,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
||||||
if (authorization.length() > 256) return 0;
|
if (authorization.length() > 256) return 0;
|
||||||
|
|
||||||
// authorization by encoded password, only for localhost access
|
// authorization by encoded password, only for localhost access
|
||||||
if ((((String) header.get("CLIENTIP", "")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost
|
if ((((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost
|
||||||
|
|
||||||
// authorization by hit in userDB
|
// authorization by hit in userDB
|
||||||
if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION, "xxxxxx"), ((String) header.get("CLIENTIP", "")), header.getHeaderCookies())) return 4; //return, because 4=max
|
if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION, "xxxxxx"), ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")), header.getHeaderCookies())) return 4; //return, because 4=max
|
||||||
|
|
||||||
// authorization with admin keyword in configuration
|
// authorization with admin keyword in configuration
|
||||||
return httpd.staticAdminAuthenticated(authorization, this);
|
return httpd.staticAdminAuthenticated(authorization, this);
|
||||||
|
|
|
@ -314,13 +314,13 @@ public final class plasmaWordIndex implements indexRI {
|
||||||
wprop.posInPhrase,
|
wprop.posInPhrase,
|
||||||
wprop.numOfPhrase,
|
wprop.numOfPhrase,
|
||||||
0,
|
0,
|
||||||
size,
|
|
||||||
urlModified.getTime(),
|
urlModified.getTime(),
|
||||||
System.currentTimeMillis(),
|
System.currentTimeMillis(),
|
||||||
language,
|
language,
|
||||||
doctype,
|
doctype,
|
||||||
outlinksSame, outlinksOther,
|
outlinksSame, outlinksOther,
|
||||||
wprop.flags);
|
wprop.flags,
|
||||||
|
0.0);
|
||||||
addEntry(plasmaCondenser.word2hash(word), ientry, System.currentTimeMillis(), false);
|
addEntry(plasmaCondenser.word2hash(word), ientry, System.currentTimeMillis(), false);
|
||||||
wordCount++;
|
wordCount++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user