more performance hacks

this makes YaCy search results VERY fast for all verify=false search cases
and it enhances the search speed also for all other snippet-fetch cases.
With this change my peer performed 100 Queries Per Second (!!!) while doing 10 queries simultanously (!!!)
in an intranet index of 20000 URLs on my 16-core Mac

Check this yourself by doing:
cd bin
./searchtestmulti.sh
after finishing the run, divide 1000 by the given time per query (which is the qps for one thread)
and then multiply again by 10 (because 10 search threads has been started)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7231 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2010-10-09 08:55:57 +00:00
parent b8aee6d402
commit 0d363a94d7
15 changed files with 128 additions and 71 deletions

View File

@ -100,7 +100,6 @@ public class yacysearch {
String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
String querystring = originalquerystring.replace('+', ' ');
CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFFRESH : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
if (snippetFetchStrategy == null) snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
final serverObjects prop = new serverObjects();
// get segment
@ -167,7 +166,7 @@ public class yacysearch {
// collect search attributes
boolean newsearch = post.hasValue("query") && post.hasValue("former") && !post.get("query","").equalsIgnoreCase(post.get("former","")); //new search term
int itemsPerPage = Math.min((authenticated) ? (snippetFetchStrategy.isAllowedToFetchOnline() ? 100 : 1000) : (snippetFetchStrategy.isAllowedToFetchOnline() ? 10 : 100), post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative
int itemsPerPage = Math.min((authenticated) ? (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ? 100 : 1000) : (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ? 10 : 100), post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative
int offset = (newsearch) ? 0 : post.getInt("startRecord", post.getInt("offset", 0));
int newcount;
@ -234,7 +233,7 @@ public class yacysearch {
boolean block = false;
if (Domains.matchesList(client, sb.networkBlacklist)) {
global = false;
snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
if (snippetFetchStrategy != null) snippetFetchStrategy = null;
block = true;
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
} else if (Domains.matchesList(client, sb.networkWhitelist)) {
@ -254,9 +253,9 @@ public class yacysearch {
}
}
// protection against too many remote server snippet loads (protects traffic on server)
if (snippetFetchStrategy.isAllowedToFetchOnline()) {
if (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline()) {
if (accInTenMinutes >= 20 || accInOneMinute >= 4 || accInThreeSeconds >= 1) {
snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
snippetFetchStrategy = null;
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInOneSecond + "/1s, " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed remote snippet loading");
}
}
@ -554,19 +553,7 @@ public class yacysearch {
suggestion = meanIt.next();
prop.put("didYouMean_suggestions_"+meanCount+"_word", suggestion);
prop.put("didYouMean_suggestions_"+meanCount+"_url",
"/yacysearch.html" + "?display=" + display +
"&query=" + suggestion +
"&maximumRecords="+ theQuery.displayResults() +
"&startRecord=" + (0 * theQuery.displayResults()) +
"&resource=" + ((theQuery.isLocal()) ? "local" : "global") +
"&verify=" + (theQuery.snippetCacheStrategy.mustBeOffline() ? "false" : "true") +
"&nav=" + theQuery.navigators +
"&urlmaskfilter=" + originalUrlMask.toString() +
"&prefermaskfilter=" + theQuery.prefer.toString() +
"&cat=href&constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
"&contentdom=" + theQuery.contentdom() +
"&former=" + theQuery.queryString(true) +
"&meanCount=" + meanMax
QueryParams.navurl("html", 0, display, theQuery, suggestion, originalUrlMask.toString(), theQuery.navigators)
);
prop.put("didYouMean_suggestions_"+meanCount+"_sep","|");
meanCount++;
@ -624,7 +611,7 @@ public class yacysearch {
resnav.append("<img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" />&nbsp;");
} else {
resnav.append("<a id=\"prevpage\" href=\"");
resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, originalUrlMask, null, navigation));
resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, null, originalUrlMask, navigation));
resnav.append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a>&nbsp;");
}
final int numberofpages = Math.min(10, Math.max(1 + thispage, 1 + ((theSearch.getRankingResult().getLocalIndexCount() < 11) ? Math.max(30, theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize()) : theSearch.getRankingResult().getLocalIndexCount()) / theQuery.displayResults()));
@ -637,7 +624,7 @@ public class yacysearch {
resnav.append("\" width=\"16\" height=\"16\" />&nbsp;");
} else {
resnav.append("<a href=\"");
resnav.append(QueryParams.navurl("html", i, display, theQuery, originalUrlMask, null, navigation));
resnav.append(QueryParams.navurl("html", i, display, theQuery, null, originalUrlMask, navigation));
resnav.append("\"><img src=\"env/grafics/navd");
resnav.append(i + 1);
resnav.append(".gif\" alt=\"page");
@ -649,7 +636,7 @@ public class yacysearch {
resnav.append("<img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" />");
} else {
resnav.append("<a id=\"nextpage\" href=\"");
resnav.append(QueryParams.navurl("html", thispage + 1, display, theQuery, originalUrlMask, null, navigation));
resnav.append(QueryParams.navurl("html", thispage + 1, display, theQuery, null, originalUrlMask, navigation));
resnav.append("\"><img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" /></a>");
}
String resnavs = resnav.toString();
@ -705,7 +692,7 @@ public class yacysearch {
prop.putHTML("prefermaskfilter", prefermask);
prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", (constraint == null) ? "" : constraint.exportB64());
prop.put("verify", snippetFetchStrategy.toName());
prop.put("verify", snippetFetchStrategy == null ? "false" : snippetFetchStrategy.toName());
prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text")));
prop.put("searchdomswitches", sb.getConfigBool("search.text", true) || sb.getConfigBool("search.audio", true) || sb.getConfigBool("search.video", true) || sb.getConfigBool("search.image", true) || sb.getConfigBool("search.app", true) ? 1 : 0);
prop.put("searchdomswitches_searchtext", sb.getConfigBool("search.text", true) ? 1 : 0);

View File

@ -72,8 +72,8 @@ public class yacysearchtrailer {
for (i = 0; i < Math.min(10, namespaceNavigator.size()); i++) {
entry = namespaceNavigator.get(i);
prop.put("nav-namespace_element_" + i + "_name", entry.name);
prop.put("nav-namespace_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask.toString(), "inurl:" + entry.name, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask.toString(), "inurl:" + entry.name, theQuery.navigators));
prop.put("nav-namespace_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-namespace_element_" + i + "_count", entry.count);
prop.put("nav-namespace_element_" + i + "_modifier", "inurl:" + entry.name);
prop.put("nav-namespace_element_" + i + "_nl", 1);
@ -94,8 +94,8 @@ public class yacysearchtrailer {
for (i = 0; i < Math.min(10, hostNavigator.size()); i++) {
entry = hostNavigator.get(i);
prop.put("nav-domains_element_" + i + "_name", entry.name);
prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask.toString(), "site:" + entry.name, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask.toString(), "site:" + entry.name, theQuery.navigators));
prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-domains_element_" + i + "_count", entry.count);
prop.put("nav-domains_element_" + i + "_modifier", "site:" + entry.name);
prop.put("nav-domains_element_" + i + "_nl", 1);
@ -118,8 +118,8 @@ public class yacysearchtrailer {
entry = authorNavigator.get(i);
anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name.replace(" ", "+") + "'";
prop.put("nav-authors_element_" + i + "_name", entry.name);
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask.toString(), anav, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-authors_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask.toString(), anav, theQuery.navigators));
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-authors_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-authors_element_" + i + "_count", entry.count);
prop.put("nav-authors_element_" + i + "_modifier", "author:'" + entry.name + "'");
prop.put("nav-authors_element_" + i + "_nl", 1);
@ -144,8 +144,8 @@ public class yacysearchtrailer {
if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break;
if (e != null && e.name != null) {
prop.putHTML("nav-topics_element_" + i + "_name", e.name);
prop.put("nav-topics_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask.toString(), e.name, theQuery.navigators) + "\">" + e.name + " (" + e.count + ")</a>");
prop.putJSON("nav-topics_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask.toString(), e.name, theQuery.navigators));
prop.put("nav-topics_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + e.name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + e.name + " (" + e.count + ")</a>");
prop.putJSON("nav-topics_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + e.name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-topics_element_" + i + "_count", e.count);
prop.put("nav-topics_element_" + i + "_modifier", e.name);
prop.put("nav-topics_element_" + i + "_nl", (iter.hasNext() && i < MAX_TOPWORDS) ? 1 : 0);

View File

@ -316,12 +316,13 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return NOCACHE;
}
public static CacheStrategy parse(String name) {
if (name == null) return null;
if (name.equals("nocache")) return NOCACHE;
if (name.equals("iffresh")) return IFFRESH;
if (name.equals("ifexist")) return IFEXIST;
if (name.equals("cacheonly")) return CACHEONLY;
if (name.equals("true")) return IFFRESH;
if (name.equals("false")) return CACHEONLY;
if (name.equals("false")) return null;
return null;
}
public String toName() {

View File

@ -60,6 +60,8 @@ public final class QueryParams {
public static final int SEARCHDOM_GLOBALDHT = 3;
public static final int SEARCHDOM_GLOBALALL = 4;
private static final String ampersand = "&amp;";
public static enum FetchMode {
NO_FETCH_NO_VERIFY,
FETCH_BUT_ACCEPT_OFFLINE_OR_USE_CACHE,
@ -137,7 +139,7 @@ public final class QueryParams {
this.domMaxTargets = 0;
this.constraint = constraint;
this.allofconstraint = false;
this.snippetCacheStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
this.snippetCacheStrategy = null;
this.host = null;
this.sitehash = null;
this.authorhash = null;
@ -453,9 +455,9 @@ public final class QueryParams {
* @param addToQuery
* @return
*/
public static String navurl(final String ext, final int page, final int display, final QueryParams theQuery, final String originalUrlMask, final String addToQuery, final String nav) {
final String ampersand = "&amp;";
public static String navurl(
final String ext, final int page, final int display, final QueryParams theQuery,
String newQueryString, final String originalUrlMask, final String nav) {
final StringBuilder sb = new StringBuilder();
sb.append("/yacysearch.");
@ -465,8 +467,7 @@ public final class QueryParams {
sb.append(ampersand);
sb.append("query=");
sb.append(theQuery.queryStringForUrl());
sb.append((addToQuery == null) ? "" : "+" + addToQuery);
sb.append(newQueryString == null ? theQuery.queryStringForUrl() : newQueryString);
sb.append(ampersand);
sb.append("maximumRecords=");
@ -482,7 +483,7 @@ public final class QueryParams {
sb.append(ampersand);
sb.append("verify=");
sb.append(theQuery.snippetCacheStrategy.mustBeOffline() ? "false" : "true");
sb.append(theQuery.snippetCacheStrategy == null ? "false" : theQuery.snippetCacheStrategy.toName());
sb.append(ampersand);
sb.append("nav=");

View File

@ -34,6 +34,7 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
import net.yacy.document.Condenser;
import net.yacy.document.LargeNumberCache;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow;
@ -155,7 +156,7 @@ public class ReferenceOrder {
if (count == null) {
doms0.put(dom, int1);
} else {
doms0.put(dom, Integer.valueOf(count.intValue() + 1));
doms0.put(dom, LargeNumberCache.valueOf(count.intValue() + 1));
}
}

View File

@ -43,7 +43,6 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.CrawlProfile.CacheStrategy;
import de.anomic.search.MediaSnippet;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.graphics.ProfilingGraph;
@ -187,7 +186,7 @@ public class ResultFetcher {
if (failedURLs.has(page.hash())) continue;
loops++;
final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
final ResultEntry resultEntry = fetchSnippet(page, cacheStrategy); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) continue; // the entry had some problems, cannot be used
//if (result.contains(resultEntry)) continue;

View File

@ -36,6 +36,7 @@ import java.util.TreeSet;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import net.yacy.document.LargeNumberCache;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -178,7 +179,7 @@ public final class SearchEvent {
mindhtdistance = l;
IAneardhthash = wordhash;
}
IACount.put(wordhash, Integer.valueOf(container.size()));
IACount.put(wordhash, LargeNumberCache.valueOf(container.size()));
IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
}
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false);

View File

@ -762,16 +762,22 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
defaultPort = true;
}
final String urlPath = this.getFile(excludeReference, removeSessionID);
if (defaultPort) {
return
this.protocol + "://" +
((this.getHost() == null) ? "" : ((this.userInfo != null) ? (this.userInfo + "@") : ("")) + this.getHost().toLowerCase()) +
urlPath;
StringBuilder u = new StringBuilder(80);
u.append(this.protocol);
u.append("://");
if (this.getHost() != null) {
if (this.userInfo != null) {
u.append(this.userInfo);
u.append("@");
}
return this.protocol + "://" +
((this.userInfo != null) ? (this.userInfo + "@") : ("")) +
this.getHost().toLowerCase() + ((defaultPort) ? ("") : (":" + this.port)) + urlPath;
u.append(this.getHost().toLowerCase());
}
if (!defaultPort) {
u.append(":");
u.append(this.port);
}
u.append(urlPath);
return u.toString();
}
public int hashCode() {

View File

@ -30,6 +30,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
@ -87,6 +88,11 @@ public final class Condenser {
public static final int flag_cat_hasapp = 23; // the page refers to (at least one) application file
private final static int numlength = 5;
private final static NumberFormat intStringFormatter = NumberFormat.getIntegerInstance();
static {
intStringFormatter.setMinimumIntegerDigits(numlength);
intStringFormatter.setMaximumIntegerDigits(numlength);
}
//private Properties analysis;
private Map<String, Word> words; // a string (the words) to (indexWord) - relation
@ -97,7 +103,7 @@ public final class Condenser {
public int RESULT_NUMB_SENTENCES = -1;
public int RESULT_DIFF_SENTENCES = -1;
public Bitfield RESULT_FLAGS = new Bitfield(4);
Identificator languageIdentificator;
private Identificator languageIdentificator;
public Condenser(
final Document document,
@ -268,12 +274,6 @@ public final class Condenser {
return this.languageIdentificator.getLanguage();
}
public String intString(final int number, final int length) {
String s = Integer.toString(number);
while (s.length() < length) s = "0" + s;
return s;
}
private void createCondensement(final InputStream is) throws UnsupportedEncodingException {
final HashSet<String> currsentwords = new HashSet<String>();
StringBuilder sentence = new StringBuilder(100);
@ -357,7 +357,7 @@ public final class Condenser {
}
words.put(word, wsp);
// we now have the unique handle of the word, put it into the sentence:
sentence.append(intString(wordHandle, numlength));
sentence.append(intStringFormatter.format(wordHandle));
wordInSentenceCounter++;
}
}
@ -389,7 +389,7 @@ public final class Condenser {
wc = (sentence.length() - 1) / numlength;
s = new String[wc + 2];
psp = sentences.get(sentence);
s[0] = intString(psp.occurrences(), numlength); // number of occurrences of this sentence
s[0] = intStringFormatter.format(psp.occurrences()); // number of occurrences of this sentence
s[1] = sentence.substring(0, 1); // the termination symbol of this sentence
for (int i = 0; i < wc; i++) {
k = sentence.substring(i * numlength + 1, (i + 1) * numlength + 1);
@ -422,8 +422,8 @@ public final class Condenser {
idx = it1.next().intValue(); // number of a sentence
s = (String[]) orderedSentences[idx];
for (int j = 2; j < s.length; j++) {
if (s[j].equals(intString(wsp.posInText, numlength)))
s[j] = intString(wsp1.posInText, numlength);
if (s[j].equals(intStringFormatter.format(wsp.posInText)))
s[j] = intStringFormatter.format(wsp1.posInText);
}
orderedSentences[idx] = s;
}
@ -479,7 +479,7 @@ public final class Condenser {
hash = Word.word2hash(word.toString());
// don't overwrite old values, that leads to too far word distances
oldpos = map.put(hash, Integer.valueOf(pos));
oldpos = map.put(hash, LargeNumberCache.valueOf(pos));
if (oldpos != null) map.put(hash, oldpos);
pos += word.length() + 1;

View File

@ -0,0 +1,57 @@
/**
* LargeNumberCache.java
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 09.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document;
/**
* a LargeIntegerCache is used whenever a Integer.valueOf(int i) is used.
* The Integer java class provides a cache for values from -128 to +127
* which is not enough for the parser to organize word positions in texts
* Using this large cache the parser has a lower memory allocation and is faster.
*/
public class LargeNumberCache {
private static final int integerCacheLimit = 3000;
private static final Integer integerCache[];
// fill the cache
static {
integerCache = new Integer[integerCacheLimit];
for (int i = 0; i < integerCache.length; i++) integerCache[i] = new Integer(i);
}
/**
* Returns a Integer instance representing the specified int value.
* If a new Integer instance is not required, this method
* should generally be used in preference to the constructor
* {@link #Integer(int)}, as this method is likely to yield
* significantly better space and time performance by caching
* frequently requested values.
*
* @param i an int value.
* @return a Integer instance representing i.
*/
public final static Integer valueOf(final int i) {
if (i < 0) return Integer.valueOf(i);
if (i >= integerCacheLimit) return new Integer(i);
return integerCache[i];
}
}

View File

@ -48,7 +48,7 @@ public class Phrase {
}
public void check(final int i) {
hash.add(Integer.valueOf(i));
hash.add(LargeNumberCache.valueOf(i));
}

View File

@ -43,7 +43,8 @@ public class SnippetExtractor {
Integer pos;
TreeSet<Integer> positions;
int linenumber = 0;
for (StringBuilder sentence: sentences) {
int fullmatchcounter = 0;
lookup: for (StringBuilder sentence: sentences) {
hs = Condenser.hashSentence(sentence.toString());
positions = new TreeSet<Integer>();
for (byte[] word: queryhashes) {
@ -61,6 +62,8 @@ public class SnippetExtractor {
if (positions.size() > 0) {
order.put(Long.valueOf(-100000000L * (linenumber == 0 ? 1 : 0) + 10000000L * positions.size() + 1000000L * worddistance + 100000L * linelengthKey(sentence.length(), maxLength) - 10000L * linenumber + uniqCounter--), sentence);
if (order.size() > 5) order.remove(order.firstEntry().getKey());
if (positions.size() == queryhashes.size()) fullmatchcounter++;
if (fullmatchcounter >= 3) break lookup;
}
linenumber++;
}

View File

@ -92,7 +92,7 @@ public class swfParser extends AbstractParser implements Parser {
while ((urlStart = contents.indexOf("http://",urlEnd)) >= 0){
urlEnd = contents.indexOf(linebreak,urlStart);
url = contents.substring(urlStart,urlEnd);
urlnr = (Integer.valueOf(++urls)).toString();
urlnr = Integer.toString(++urls).toString();
anchors.put(new MultiProtocolURI(url), urlnr);
contents = contents.substring(0,urlStart)+contents.substring(urlEnd);
}

View File

@ -33,6 +33,7 @@ import java.util.Set;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.document.LargeNumberCache;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
@ -54,7 +55,7 @@ public class Word {
*/
public static final int commonHashLength = 12;
private static final int hashCacheSize = Math.max(10000, Math.min(100000, (int) (MemoryControl.available() / 20000L)));
private static final int hashCacheSize = Math.max(100000, Math.min(10000000, (int) (MemoryControl.available() / 20000L)));
private static final ARC<String, byte[]> hashCache = new ConcurrentARC<String, byte[]>(hashCacheSize, Runtime.getRuntime().availableProcessors() + 1);
// object carries statistics for words and sentences
@ -83,7 +84,7 @@ public class Word {
}
public void check(final int i) {
phrases.add(Integer.valueOf(i));
phrases.add(LargeNumberCache.valueOf(i));
}
public Iterator<Integer> phrases() {

View File

@ -50,7 +50,7 @@ import net.yacy.kelondro.logging.Log;
public class Digest {
private final static int digestThreads = Runtime.getRuntime().availableProcessors() + 1;
private final static int digestThreads = Runtime.getRuntime().availableProcessors() * 2 + 1;
public static BlockingQueue<MessageDigest> digestPool = new ArrayBlockingQueue<MessageDigest>(digestThreads);
static {
for (int i = 0; i < digestThreads; i++)