mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
more UTF8 getBytes() performance hacks
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7649 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
78d6d6ca06
commit
4c013d9088
|
@ -455,8 +455,8 @@
|
|||
|
||||
<!-- packing all files into a gzipped tar -->
|
||||
<tarfileset dir="${release_main}" prefix="${releaseFileParentDir}/" dirmode="${accessRightsDir}" mode="${accessRightsExecutable}">
|
||||
<include name="**/*.sh"/>
|
||||
<include name="**/*.command"/>
|
||||
<include name="**/*.sh"/>
|
||||
<include name="**/*.command"/>
|
||||
</tarfileset>
|
||||
<tarfileset dir="${release_main}" prefix="${releaseFileParentDir}/" dirmode="${accessRightsDir}" mode="${accessRightsFile}" >
|
||||
<include name="**/*"/>
|
||||
|
@ -529,7 +529,7 @@
|
|||
</fileset>
|
||||
</copy>
|
||||
|
||||
<!-- copy language statistics files -->
|
||||
<!-- copy shell scripts from bin/ -->
|
||||
<!-- i'm not sure, if this is consistent with the debian policy -->
|
||||
<!-- but for /usr/bin or /usr/lib we need an extra environment variable -->
|
||||
<copy todir="${DESTDIR}/usr/share/yacy/bin">
|
||||
|
|
|
@ -107,6 +107,8 @@ public class yacysearch_location {
|
|||
if (search_publisher) words.append(message.getCopyright().trim()).append(space);
|
||||
if (search_creator) words.append(message.getAuthor().trim()).append(space);
|
||||
String subject = "";
|
||||
assert message != null;
|
||||
assert message.getSubject() != null;
|
||||
for (String s: message.getSubject()) subject += s.trim() + space;
|
||||
if (search_subject) words.append(subject).append(space);
|
||||
String[] wordlist = words.toString().trim().split(space);
|
||||
|
|
|
@ -406,7 +406,7 @@ public class Balancer {
|
|||
|
||||
// at this point we must check if the crawlEntry has relevance because the crawl profile still exists
|
||||
// if not: return null. A calling method must handle the null value and try again
|
||||
final CrawlProfile profileEntry = cs.getActive(crawlEntry.profileHandle().getBytes());
|
||||
final CrawlProfile profileEntry = cs.getActive(UTF8.getBytes(crawlEntry.profileHandle()));
|
||||
if (profileEntry == null) {
|
||||
Log.logWarning("Balancer", "no profile entry for handle " + crawlEntry.profileHandle());
|
||||
return null;
|
||||
|
|
|
@ -242,7 +242,7 @@ public class CrawlQueues {
|
|||
log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
|
||||
return true;
|
||||
}
|
||||
CrawlProfile profile = sb.crawler.getActive(profileHandle.getBytes());
|
||||
CrawlProfile profile = sb.crawler.getActive(UTF8.getBytes(profileHandle));
|
||||
if (profile == null) {
|
||||
log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
|
||||
return true;
|
||||
|
@ -284,7 +284,7 @@ public class CrawlQueues {
|
|||
* @return
|
||||
*/
|
||||
private void load(Request urlEntry, final String stats, final String profileHandle) {
|
||||
final CrawlProfile profile = sb.crawler.getActive(profileHandle.getBytes());
|
||||
final CrawlProfile profile = sb.crawler.getActive(UTF8.getBytes(profileHandle));
|
||||
if (profile != null) {
|
||||
|
||||
// check if the protocol is supported
|
||||
|
@ -467,7 +467,7 @@ public class CrawlQueues {
|
|||
// stack url
|
||||
if (sb.getLog().isFinest()) sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
|
||||
sb.crawlStacker.enqueueEntry(new Request(
|
||||
hash.getBytes(),
|
||||
UTF8.getBytes(hash),
|
||||
url,
|
||||
(referrer == null) ? null : referrer.hash(),
|
||||
item.getDescription(),
|
||||
|
@ -578,7 +578,7 @@ public class CrawlQueues {
|
|||
//if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt.");
|
||||
errorURL.push(
|
||||
this.request,
|
||||
sb.peers.mySeed().hash.getBytes(),
|
||||
UTF8.getBytes(sb.peers.mySeed().hash),
|
||||
new Date(),
|
||||
1,
|
||||
"denied by robots.txt");
|
||||
|
@ -593,7 +593,7 @@ public class CrawlQueues {
|
|||
try {
|
||||
request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
|
||||
final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
|
||||
final CrawlProfile e = sb.crawler.getActive(request.profileHandle().getBytes());
|
||||
final CrawlProfile e = sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
|
||||
Response response = sb.loader.load(request, e == null ? CrawlProfile.CacheStrategy.IFEXIST : e.cacheStrategy(), maxFileSize, true);
|
||||
if (response == null) {
|
||||
request.setStatus("error", WorkflowJob.STATUS_FINISHED);
|
||||
|
@ -614,7 +614,7 @@ public class CrawlQueues {
|
|||
if (result != null) {
|
||||
errorURL.push(
|
||||
this.request,
|
||||
sb.peers.mySeed().hash.getBytes(),
|
||||
UTF8.getBytes(sb.peers.mySeed().hash),
|
||||
new Date(),
|
||||
1,
|
||||
"cannot load: " + result);
|
||||
|
@ -626,7 +626,7 @@ public class CrawlQueues {
|
|||
} catch (final Exception e) {
|
||||
errorURL.push(
|
||||
this.request,
|
||||
sb.peers.mySeed().hash.getBytes(),
|
||||
UTF8.getBytes(sb.peers.mySeed().hash),
|
||||
new Date(),
|
||||
1,
|
||||
e.getMessage() + " - in worker");
|
||||
|
|
|
@ -201,7 +201,7 @@ public final class CrawlStacker {
|
|||
|
||||
// if the url was rejected we store it into the error URL db
|
||||
if (rejectReason != null) {
|
||||
nextQueue.errorURL.push(entry, peers.mySeed().hash.getBytes(), new Date(), 1, rejectReason);
|
||||
nextQueue.errorURL.push(entry, UTF8.getBytes(peers.mySeed().hash), new Date(), 1, rejectReason);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
CrawlStacker.this.log.logWarning("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
|
||||
|
@ -327,7 +327,7 @@ public final class CrawlStacker {
|
|||
// returns null if successful, a reason string if not successful
|
||||
//this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
|
||||
|
||||
final CrawlProfile profile = crawler.getActive(entry.profileHandle().getBytes());
|
||||
final CrawlProfile profile = crawler.getActive(UTF8.getBytes(entry.profileHandle()));
|
||||
String error;
|
||||
if (profile == null) {
|
||||
error = "LOST STACKER PROFILE HANDLE '" + entry.profileHandle() + "' for URL " + entry.url();
|
||||
|
@ -339,7 +339,7 @@ public final class CrawlStacker {
|
|||
if (error != null) return error;
|
||||
|
||||
// store information
|
||||
final boolean local = Base64Order.enhancedCoder.equal(entry.initiator(), peers.mySeed().hash.getBytes());
|
||||
final boolean local = Base64Order.enhancedCoder.equal(entry.initiator(), UTF8.getBytes(peers.mySeed().hash));
|
||||
final boolean proxy = (entry.initiator() == null || entry.initiator().length == 0 || UTF8.String(entry.initiator()).equals("------------")) && profile.handle().equals(crawler.defaultProxyProfile.handle());
|
||||
final boolean remote = profile.handle().equals(crawler.defaultRemoteProfile.handle());
|
||||
final boolean global =
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.blob.MapHeap;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
@ -214,44 +215,44 @@ public final class CrawlSwitchboard {
|
|||
true,
|
||||
false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, true, true, true,
|
||||
CrawlProfile.CacheStrategy.IFFRESH);
|
||||
this.profilesActiveCrawls.put(this.defaultProxyProfile.handle().getBytes(), this.defaultProxyProfile);
|
||||
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultProxyProfile.handle()), this.defaultProxyProfile);
|
||||
}
|
||||
if (this.defaultRemoteProfile == null) {
|
||||
// generate new default entry for remote crawling
|
||||
this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
||||
-1, -1, true, true, true, false, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
|
||||
this.profilesActiveCrawls.put(this.defaultRemoteProfile.handle().getBytes(), this.defaultRemoteProfile);
|
||||
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRemoteProfile.handle()), this.defaultRemoteProfile);
|
||||
}
|
||||
if (this.defaultTextSnippetLocalProfile == null) {
|
||||
// generate new default entry for snippet fetch and optional crawling
|
||||
this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
||||
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
|
||||
this.profilesActiveCrawls.put(this.defaultTextSnippetLocalProfile.handle().getBytes(), this.defaultTextSnippetLocalProfile);
|
||||
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), this.defaultTextSnippetLocalProfile);
|
||||
}
|
||||
if (this.defaultTextSnippetGlobalProfile == null) {
|
||||
// generate new default entry for snippet fetch and optional crawling
|
||||
this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
||||
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
|
||||
this.profilesActiveCrawls.put(this.defaultTextSnippetGlobalProfile.handle().getBytes(), this.defaultTextSnippetGlobalProfile);
|
||||
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile);
|
||||
}
|
||||
this.defaultTextSnippetGlobalProfile.setCacheStrategy(CrawlProfile.CacheStrategy.IFEXIST);
|
||||
if (this.defaultMediaSnippetLocalProfile == null) {
|
||||
// generate new default entry for snippet fetch and optional crawling
|
||||
this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
||||
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
|
||||
this.profilesActiveCrawls.put(this.defaultMediaSnippetLocalProfile.handle().getBytes(), this.defaultMediaSnippetLocalProfile);
|
||||
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile);
|
||||
}
|
||||
if (this.defaultMediaSnippetGlobalProfile == null) {
|
||||
// generate new default entry for snippet fetch and optional crawling
|
||||
this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
||||
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
|
||||
this.profilesActiveCrawls.put(this.defaultMediaSnippetGlobalProfile.handle().getBytes(), this.defaultMediaSnippetGlobalProfile);
|
||||
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultMediaSnippetGlobalProfile);
|
||||
}
|
||||
if (this.defaultSurrogateProfile == null) {
|
||||
// generate new default entry for surrogate parsing
|
||||
this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
||||
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, true, true, false, CrawlProfile.CacheStrategy.NOCACHE);
|
||||
this.profilesActiveCrawls.put(this.defaultSurrogateProfile.handle().getBytes(), this.defaultSurrogateProfile);
|
||||
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultSurrogateProfile.handle()), this.defaultSurrogateProfile);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -285,7 +286,7 @@ public final class CrawlSwitchboard {
|
|||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) ||
|
||||
(entry.name().equals(CRAWL_PROFILE_SURROGATE)))) {
|
||||
CrawlProfile p = new CrawlProfile(entry);
|
||||
profilesPassiveCrawls.put(p.handle().getBytes(), p);
|
||||
profilesPassiveCrawls.put(UTF8.getBytes(p.handle()), p);
|
||||
profilesActiveCrawls.remove(handle);
|
||||
hasDoneSomething = true;
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Date;
|
|||
import net.yacy.cora.document.RSSFeed;
|
||||
import net.yacy.cora.document.RSSMessage;
|
||||
import net.yacy.cora.document.RSSReader;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.storage.ARC;
|
||||
import net.yacy.cora.storage.ComparableARC;
|
||||
import net.yacy.document.Parser.Failure;
|
||||
|
@ -108,7 +109,7 @@ public class RSSLoader extends Thread {
|
|||
int lastAvg = rssRow.get("avg_upd_per_day", 0);
|
||||
long thisAvg = 1000 * 60 * 60 * 24 / deltaTime * loadCount;
|
||||
long nextAvg = lastAvg == 0 ? thisAvg : (thisAvg + lastAvg * 2) / 3;
|
||||
rssRow.put("url", url.toNormalform(true, false).getBytes());
|
||||
rssRow.put("url", UTF8.getBytes(url.toNormalform(true, false)));
|
||||
rssRow.put("title", feed.getChannel().getTitle());
|
||||
rssRow.put("last_load_date", new Date());
|
||||
rssRow.put("last_load_count", loadCount);
|
||||
|
@ -140,7 +141,7 @@ public class RSSLoader extends Thread {
|
|||
// store pk of api table into rss table to show that the entry has been recorded
|
||||
assert pk != null;
|
||||
Tables.Data rssRow = new Tables.Data();
|
||||
rssRow.put("url", url.toNormalform(true, false).getBytes());
|
||||
rssRow.put("url", UTF8.getBytes(url.toNormalform(true, false)));
|
||||
rssRow.put("title", feed.getChannel().getTitle());
|
||||
rssRow.put("api_pk", pk);
|
||||
try {
|
||||
|
|
|
@ -38,19 +38,20 @@ public class ResourceObserver {
|
|||
public static final Log log = new Log("RESOURCE OBSERVER");
|
||||
|
||||
// return values for available disk/memory
|
||||
private static final int LOW = 0;
|
||||
private static final int MEDIUM = 1;
|
||||
private static final int HIGH = 2;
|
||||
public enum Space implements Comparable<Space> {
|
||||
LOW, MEDIUM, HIGH; // according to the order of the definition, LOW is smaller than MEDIUM and MEDIUM is smaller than HIGH
|
||||
}
|
||||
|
||||
private final Switchboard sb;
|
||||
private final File path; // path to check
|
||||
|
||||
private int normalizedDiskFree = HIGH;
|
||||
private int normalizedMemoryFree = HIGH;
|
||||
private Space normalizedDiskFree = Space.HIGH;
|
||||
private Space normalizedMemoryFree = Space.HIGH;
|
||||
|
||||
public ResourceObserver(final Switchboard sb) {
|
||||
this.sb = sb;
|
||||
this.path = sb.getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, "");
|
||||
log.logInfo("path for disc space measurement: " + this.path);
|
||||
}
|
||||
|
||||
public static void initThread() {
|
||||
|
@ -68,9 +69,9 @@ public class ResourceObserver {
|
|||
normalizedDiskFree = getNormalizedDiskFree();
|
||||
normalizedMemoryFree = getNormalizedMemoryFree();
|
||||
|
||||
if (normalizedDiskFree < HIGH || normalizedMemoryFree < HIGH) {
|
||||
if (normalizedDiskFree.compareTo(Space.HIGH) < 0 || normalizedMemoryFree.compareTo(Space.HIGH) < 0 ) {
|
||||
|
||||
if (normalizedDiskFree < HIGH) { // pause crawls
|
||||
if (normalizedDiskFree.compareTo(Space.HIGH) < 0 ) { // pause crawls
|
||||
if (!sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
|
||||
log.logInfo("pausing local crawls");
|
||||
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
|
||||
|
@ -81,7 +82,7 @@ public class ResourceObserver {
|
|||
}
|
||||
}
|
||||
|
||||
if ((normalizedDiskFree == LOW || normalizedMemoryFree < HIGH) && sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) {
|
||||
if ((normalizedDiskFree == Space.LOW || normalizedMemoryFree.compareTo(Space.HIGH) < 0) && sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) {
|
||||
log.logInfo("disabling index receive");
|
||||
sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false);
|
||||
sb.peers.mySeed().setFlagAcceptRemoteIndex(false);
|
||||
|
@ -108,38 +109,38 @@ public class ResourceObserver {
|
|||
* <li><code>LOW</code> if lower than hardlimit disk space is available</li>
|
||||
* </ul>
|
||||
*/
|
||||
private int getNormalizedDiskFree() {
|
||||
private Space getNormalizedDiskFree() {
|
||||
final long currentSpace = getUsableSpace(this.path);
|
||||
if(currentSpace < 1L) return HIGH;
|
||||
int ret = HIGH;
|
||||
if (currentSpace < 1L) return Space.HIGH;
|
||||
Space ret = Space.HIGH;
|
||||
|
||||
if (currentSpace < getMinFreeDiskSpace()) {
|
||||
log.logWarning("Volume " + this.path.toString() + ": free space (" + (currentSpace / 1024 / 1024) + " MB) is too low (< " + (getMinFreeDiskSpace() / 1024 / 1024) + " MB)");
|
||||
ret = MEDIUM;
|
||||
ret = Space.MEDIUM;
|
||||
}
|
||||
if (currentSpace < getMinFreeDiskSpace_hardlimit()) {
|
||||
ret = LOW;
|
||||
ret = Space.LOW;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private int getNormalizedMemoryFree() {
|
||||
if(!MemoryControl.getDHTallowed()) return LOW;
|
||||
return HIGH;
|
||||
private Space getNormalizedMemoryFree() {
|
||||
if(!MemoryControl.getDHTallowed()) return Space.LOW;
|
||||
return Space.HIGH;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if disk space is available
|
||||
*/
|
||||
public boolean getDiskAvailable() {
|
||||
return normalizedDiskFree == HIGH;
|
||||
return normalizedDiskFree == Space.HIGH;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if memory is available
|
||||
*/
|
||||
public boolean getMemoryAvailable() {
|
||||
return normalizedMemoryFree == HIGH;
|
||||
return normalizedMemoryFree == Space.HIGH;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -221,7 +221,7 @@ public final class ResultURLs {
|
|||
public static void main(final String[] args) {
|
||||
try {
|
||||
final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/");
|
||||
final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", 0.0f, 0.0f, new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de".getBytes(), 0, 0, 0, 0, 0, 0);
|
||||
final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", 0.0f, 0.0f, new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), UTF8.getBytes("de"), 0, 0, 0, 0, 0, 0);
|
||||
EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING;
|
||||
System.out.println("valid test:\n=======");
|
||||
// add
|
||||
|
|
|
@ -125,7 +125,7 @@ public class RobotsEntry {
|
|||
pathListStr.append(element)
|
||||
.append(RobotsTxt.ROBOTS_DB_PATH_SEPARATOR);
|
||||
}
|
||||
this.mem.put(ALLOW_PATH_LIST, pathListStr.substring(0,pathListStr.length()-1).getBytes());
|
||||
this.mem.put(ALLOW_PATH_LIST, UTF8.getBytes(pathListStr.substring(0,pathListStr.length()-1)));
|
||||
}
|
||||
|
||||
if (disallowPathList != null && !disallowPathList.isEmpty()) {
|
||||
|
@ -136,7 +136,7 @@ public class RobotsEntry {
|
|||
pathListStr.append(element)
|
||||
.append(RobotsTxt.ROBOTS_DB_PATH_SEPARATOR);
|
||||
}
|
||||
this.mem.put(DISALLOW_PATH_LIST,pathListStr.substring(0, pathListStr.length()-1).getBytes());
|
||||
this.mem.put(DISALLOW_PATH_LIST, UTF8.getBytes(pathListStr.substring(0, pathListStr.length()-1)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -149,7 +149,7 @@ public class RobotsEntry {
|
|||
}
|
||||
|
||||
public Map<String, byte[]> getMem() {
|
||||
if (!this.mem.containsKey(HOST_NAME)) this.mem.put(HOST_NAME, this.hostName.getBytes());
|
||||
if (!this.mem.containsKey(HOST_NAME)) this.mem.put(HOST_NAME, UTF8.getBytes(this.hostName));
|
||||
return this.mem;
|
||||
}
|
||||
|
||||
|
@ -184,7 +184,7 @@ public class RobotsEntry {
|
|||
|
||||
public void setLoadedDate(final Date newLoadedDate) {
|
||||
if (newLoadedDate != null) {
|
||||
this.mem.put(LOADED_DATE, Long.toString(newLoadedDate.getTime()).getBytes());
|
||||
this.mem.put(LOADED_DATE, UTF8.getBytes(Long.toString(newLoadedDate.getTime())));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ package de.anomic.crawler;
|
|||
import java.net.MalformedURLException;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.document.parser.sitemapParser;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
|
@ -89,7 +90,7 @@ public class SitemapImporter extends Thread {
|
|||
|
||||
// URL needs to crawled
|
||||
this.sb.crawlStacker.enqueueEntry(new Request(
|
||||
this.sb.peers.mySeed().hash.getBytes(),
|
||||
UTF8.getBytes(this.sb.peers.mySeed().hash),
|
||||
url,
|
||||
null, // this.siteMapURL.toString(),
|
||||
entry.url(),
|
||||
|
|
|
@ -33,6 +33,7 @@ import java.util.Date;
|
|||
import java.util.Iterator;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
import net.yacy.kelondro.index.Index;
|
||||
|
@ -198,7 +199,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
|
|||
newrow.setCol(1, entry.executor);
|
||||
newrow.setCol(2, entry.workdate.getTime());
|
||||
newrow.setCol(3, entry.workcount);
|
||||
newrow.setCol(4, entry.anycause.getBytes());
|
||||
newrow.setCol(4, UTF8.getBytes(entry.anycause));
|
||||
newrow.setCol(5, entry.bentry.toRow().bytes());
|
||||
try {
|
||||
if (urlIndex != null) urlIndex.put(newrow);
|
||||
|
@ -295,7 +296,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
|
|||
private boolean error = false;
|
||||
|
||||
private kiter(final boolean up, final String firstHash) throws IOException {
|
||||
i = urlIndex.rows(up, (firstHash == null) ? null : firstHash.getBytes());
|
||||
i = urlIndex.rows(up, (firstHash == null) ? null : UTF8.getBytes(firstHash));
|
||||
error = false;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ public class BlogBoard {
|
|||
* @return true if the database contains the element, else false
|
||||
*/
|
||||
public boolean contains(final String key) {
|
||||
return database.containsKey(key.getBytes());
|
||||
return database.containsKey(UTF8.getBytes(key));
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
@ -126,7 +126,7 @@ public class BlogBoard {
|
|||
public String writeBlogEntry(final BlogEntry page) {
|
||||
String ret = null;
|
||||
try {
|
||||
database.insert(page.key.getBytes(), page.record);
|
||||
database.insert(UTF8.getBytes(page.key), page.record);
|
||||
ret = page.key;
|
||||
} catch (IOException ex) {
|
||||
Log.logException(ex);
|
||||
|
@ -144,7 +144,7 @@ public class BlogBoard {
|
|||
final String normalized = normalize(key);
|
||||
Map<String, String> record;
|
||||
try {
|
||||
record = base.get(normalized.substring(0, Math.min(normalized.length(), KEY_LENGTH)).getBytes());
|
||||
record = base.get(UTF8.getBytes(normalized.substring(0, Math.min(normalized.length(), KEY_LENGTH))));
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
record = null;
|
||||
|
@ -153,7 +153,7 @@ public class BlogBoard {
|
|||
record = null;
|
||||
}
|
||||
return (record == null) ?
|
||||
newEntry(key, new byte[0], "anonymous".getBytes(), "127.0.0.1", new Date(), new byte[0], null, null) :
|
||||
newEntry(key, new byte[0], UTF8.getBytes("anonymous"), "127.0.0.1", new Date(), new byte[0], null, null) :
|
||||
new BlogEntry(key, record);
|
||||
}
|
||||
|
||||
|
@ -229,7 +229,7 @@ public class BlogBoard {
|
|||
|
||||
public void deleteBlogEntry(final String key) {
|
||||
try {
|
||||
database.delete(normalize(key).getBytes());
|
||||
database.delete(UTF8.getBytes(normalize(key)));
|
||||
} catch (final IOException e) { }
|
||||
}
|
||||
|
||||
|
|
|
@ -116,7 +116,7 @@ public class BlogBoardComments {
|
|||
public String write(final CommentEntry page) {
|
||||
// writes a new page and returns key
|
||||
try {
|
||||
database.insert(page.key.getBytes(), page.record);
|
||||
database.insert(UTF8.getBytes(page.key), page.record);
|
||||
return page.key;
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
|
@ -133,7 +133,7 @@ public class BlogBoardComments {
|
|||
copyOfKey = copyOfKey.substring(0, Math.min(copyOfKey.length(), KEY_LENGTH));
|
||||
Map<String, String> record;
|
||||
try {
|
||||
record = base.get(copyOfKey.getBytes());
|
||||
record = base.get(UTF8.getBytes(copyOfKey));
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
record = null;
|
||||
|
@ -142,7 +142,7 @@ public class BlogBoardComments {
|
|||
record = null;
|
||||
}
|
||||
return (record == null) ?
|
||||
newEntry(copyOfKey, new byte[0], "anonymous".getBytes(), "127.0.0.1", new Date(), new byte[0]) :
|
||||
newEntry(copyOfKey, new byte[0], UTF8.getBytes("anonymous"), "127.0.0.1", new Date(), new byte[0]) :
|
||||
new CommentEntry(copyOfKey, record);
|
||||
}
|
||||
|
||||
|
@ -151,7 +151,7 @@ public class BlogBoardComments {
|
|||
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||
try {
|
||||
final DocumentBuilder builder = factory.newDocumentBuilder();
|
||||
final Document doc = builder.parse(new ByteArrayInputStream(input.getBytes()));
|
||||
final Document doc = builder.parse(new ByteArrayInputStream(UTF8.getBytes(input)));
|
||||
ret = parseXMLimport(doc);
|
||||
}
|
||||
catch (final ParserConfigurationException e) {}
|
||||
|
@ -219,7 +219,7 @@ public class BlogBoardComments {
|
|||
|
||||
public void delete(final String key) {
|
||||
try {
|
||||
database.delete(normalize(key).getBytes());
|
||||
database.delete(UTF8.getBytes(normalize(key)));
|
||||
}
|
||||
catch (final IOException e) { }
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ import java.util.Map;
|
|||
|
||||
import de.anomic.data.BookmarksDB.Bookmark;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.blob.MapHeap;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
@ -60,7 +61,7 @@ public class BookmarkDate {
|
|||
public Entry getDate(final String date) {
|
||||
Map<String, String> map;
|
||||
try {
|
||||
map = datesTable.get(date.getBytes());
|
||||
map = datesTable.get(UTF8.getBytes(date));
|
||||
} catch (final IOException e) {
|
||||
map = null;
|
||||
} catch (RowSpaceExceededException e) {
|
||||
|
@ -146,13 +147,13 @@ public class BookmarkDate {
|
|||
public void setDatesTable() {
|
||||
if (this.size() >0) {
|
||||
try {
|
||||
datesTable.insert(getDateString().getBytes(), mem);
|
||||
datesTable.insert(UTF8.getBytes(getDateString()), mem);
|
||||
} catch (Exception e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
datesTable.delete(getDateString().getBytes());
|
||||
datesTable.delete(UTF8.getBytes(getDateString()));
|
||||
} catch (IOException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
|
|
@ -132,7 +132,7 @@ public class BookmarksDB {
|
|||
// adding a bookmark to the bookmarksDB
|
||||
public void saveBookmark(final Bookmark bookmark){
|
||||
try {
|
||||
bookmarks.insert(bookmark.getUrlHash().getBytes(), bookmark.entry);
|
||||
bookmarks.insert(UTF8.getBytes(bookmark.getUrlHash()), bookmark.entry);
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ public class BookmarksDB {
|
|||
|
||||
public Bookmark getBookmark(final String urlHash){
|
||||
try {
|
||||
final Map<String, String> map = bookmarks.get(urlHash.getBytes());
|
||||
final Map<String, String> map = bookmarks.get(UTF8.getBytes(urlHash));
|
||||
return (map == null) ? null : new Bookmark(map);
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
|
@ -172,7 +172,7 @@ public class BookmarksDB {
|
|||
Bookmark b;
|
||||
try {
|
||||
b = getBookmark(urlHash);
|
||||
bookmarks.delete(urlHash.getBytes());
|
||||
bookmarks.delete(UTF8.getBytes(urlHash));
|
||||
} catch (final IOException e) {
|
||||
b = null;
|
||||
}
|
||||
|
|
|
@ -184,7 +184,7 @@ public class MessageBoard {
|
|||
public String write(final entry message) {
|
||||
// writes a message and returns key
|
||||
try {
|
||||
database.insert(message.key.getBytes(), message.record);
|
||||
database.insert(UTF8.getBytes(message.key), message.record);
|
||||
return message.key;
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
|
@ -195,7 +195,7 @@ public class MessageBoard {
|
|||
public entry read(final String key) {
|
||||
Map<String, String> record;
|
||||
try {
|
||||
record = database.get(key.getBytes());
|
||||
record = database.get(UTF8.getBytes(key));
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
return null;
|
||||
|
@ -208,7 +208,7 @@ public class MessageBoard {
|
|||
|
||||
public void remove(final String key) {
|
||||
try {
|
||||
database.delete(key.getBytes());
|
||||
database.delete(UTF8.getBytes(key));
|
||||
} catch (final IOException e) {
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,6 +50,7 @@ import java.util.regex.Pattern;
|
|||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.data.word.WordReferenceRow;
|
||||
|
@ -240,9 +241,9 @@ public class URLAnalysis {
|
|||
if (gz) os = new GZIPOutputStream(os);
|
||||
count = 0;
|
||||
for (final Map.Entry<String, Integer> e: results.entrySet()) {
|
||||
os.write(e.getKey().getBytes());
|
||||
os.write(UTF8.getBytes(e.getKey()));
|
||||
os.write(new byte[]{'\t'});
|
||||
os.write(Integer.toString(e.getValue()).getBytes());
|
||||
os.write(UTF8.getBytes(Integer.toString(e.getValue())));
|
||||
os.write(new byte[]{'\n'});
|
||||
count++;
|
||||
if (System.currentTimeMillis() - time > 10000) {
|
||||
|
@ -330,7 +331,7 @@ public class URLAnalysis {
|
|||
if (gz) os = new GZIPOutputStream(os);
|
||||
int count = 0;
|
||||
for (final String h: set) {
|
||||
os.write(h.getBytes());
|
||||
os.write(UTF8.getBytes(h));
|
||||
os.write(new byte[]{'\n'});
|
||||
count++;
|
||||
if (System.currentTimeMillis() - time > 10000) {
|
||||
|
|
|
@ -88,7 +88,7 @@ public final class UserDB {
|
|||
|
||||
public void removeEntry(final String hostName) {
|
||||
try {
|
||||
userTable.delete(hostName.toLowerCase().getBytes());
|
||||
userTable.delete(UTF8.getBytes(hostName.toLowerCase()));
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ public final class UserDB {
|
|||
}
|
||||
Map<String, String> record = null;
|
||||
try {
|
||||
record = userTable.get(userName.getBytes());
|
||||
record = userTable.get(UTF8.getBytes(userName));
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
} catch (RowSpaceExceededException e) {
|
||||
|
@ -117,7 +117,7 @@ public final class UserDB {
|
|||
|
||||
public String addEntry(final Entry entry) {
|
||||
try {
|
||||
userTable.insert(entry.userName.getBytes(), entry.mem);
|
||||
userTable.insert(UTF8.getBytes(entry.userName), entry.mem);
|
||||
return entry.userName;
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
|
@ -522,7 +522,7 @@ public final class UserDB {
|
|||
}
|
||||
|
||||
try {
|
||||
UserDB.this.userTable.insert(getUserName().getBytes(), this.mem);
|
||||
UserDB.this.userTable.insert(UTF8.getBytes(getUserName()), this.mem);
|
||||
} catch(final Exception e){
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -539,7 +539,7 @@ public final class UserDB {
|
|||
|
||||
public void setProperty(final String propName, final String newValue) throws IOException, RowSpaceExceededException {
|
||||
this.mem.put(propName, newValue);
|
||||
UserDB.this.userTable.insert(getUserName().getBytes(), this.mem);
|
||||
UserDB.this.userTable.insert(UTF8.getBytes(getUserName()), this.mem);
|
||||
}
|
||||
|
||||
public String getProperty(final String propName, final String defaultValue) {
|
||||
|
@ -550,62 +550,6 @@ public final class UserDB {
|
|||
return (this.mem.containsKey(accessRight.toString())) ? this.mem.get(accessRight.toString()).equals("true") : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use hasRight(UPLOAD_RIGHT) instead
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasUploadRight() {
|
||||
return this.hasRight(AccessRight.UPLOAD_RIGHT);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use hasRight(DOWNLOAD_RIGHT) instead
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasDownloadRight() {
|
||||
return this.hasRight(AccessRight.DOWNLOAD_RIGHT);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use hasRight(PROXY_RIGHT) instead
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasProxyRight() {
|
||||
return this.hasRight(AccessRight.PROXY_RIGHT);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use hasRight(ADMIN_RIGHT) instead
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasAdminRight() {
|
||||
return this.hasRight(AccessRight.ADMIN_RIGHT);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use hasRight(BLOG_RIGHT) instead
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasBlogRight() {
|
||||
return this.hasRight(AccessRight.BLOG_RIGHT);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use hasRight(WIKIADMIN_RIGHT) instead
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasWikiAdminRight() {
|
||||
return this.hasRight(AccessRight.WIKIADMIN_RIGHT);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use hasRight(BOOKMARK_RIGHT) instead
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasBookmarkRight() {
|
||||
return this.hasRight(AccessRight.BOOKMARK_RIGHT);
|
||||
}
|
||||
|
||||
public boolean isLoggedOut(){
|
||||
return (this.mem.containsKey(LOGGED_OUT) ? this.mem.get(LOGGED_OUT).equals("true") : false);
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ public class WorkTables extends Tables {
|
|||
public byte[] recordAPICall(final serverObjects post, final String servletName, final String type, final String comment) {
|
||||
// remove the apicall attributes from the post object
|
||||
String pks = post.remove(TABLE_API_COL_APICALL_PK);
|
||||
byte[] pk = pks == null ? null : pks.getBytes();
|
||||
byte[] pk = pks == null ? null : UTF8.getBytes(pks);
|
||||
|
||||
// generate the apicall url - without the apicall attributes
|
||||
final String apiurl = /*"http://localhost:" + getConfig("port", "8090") +*/ "/" + servletName + "?" + post.toString();
|
||||
|
@ -119,12 +119,12 @@ public class WorkTables extends Tables {
|
|||
if (row == null) {
|
||||
// create and insert new entry
|
||||
Data data = new Data();
|
||||
data.put(TABLE_API_COL_TYPE, type.getBytes());
|
||||
data.put(TABLE_API_COL_COMMENT, comment.getBytes());
|
||||
byte[] date = GenericFormatter.SHORT_MILSEC_FORMATTER.format().getBytes();
|
||||
data.put(TABLE_API_COL_TYPE, UTF8.getBytes(type));
|
||||
data.put(TABLE_API_COL_COMMENT, UTF8.getBytes(comment));
|
||||
byte[] date = UTF8.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format());
|
||||
data.put(TABLE_API_COL_DATE_RECORDING, date);
|
||||
data.put(TABLE_API_COL_DATE_LAST_EXEC, date);
|
||||
data.put(TABLE_API_COL_URL, apiurl.getBytes());
|
||||
data.put(TABLE_API_COL_URL, UTF8.getBytes(apiurl));
|
||||
|
||||
// insert APICALL attributes
|
||||
data.put(TABLE_API_COL_APICALL_COUNT, "1");
|
||||
|
@ -133,7 +133,7 @@ public class WorkTables extends Tables {
|
|||
// modify and update existing entry
|
||||
|
||||
// modify date attributes and patch old values
|
||||
row.put(TABLE_API_COL_DATE_LAST_EXEC, GenericFormatter.SHORT_MILSEC_FORMATTER.format().getBytes());
|
||||
row.put(TABLE_API_COL_DATE_LAST_EXEC, UTF8.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format()));
|
||||
if (!row.containsKey(TABLE_API_COL_DATE_RECORDING)) row.put(TABLE_API_COL_DATE_RECORDING, row.get(TABLE_API_COL_DATE));
|
||||
row.remove(TABLE_API_COL_DATE);
|
||||
|
||||
|
@ -180,17 +180,17 @@ public class WorkTables extends Tables {
|
|||
try {
|
||||
// create and insert new entry
|
||||
Data data = new Data();
|
||||
data.put(TABLE_API_COL_TYPE, type.getBytes());
|
||||
data.put(TABLE_API_COL_COMMENT, comment.getBytes());
|
||||
byte[] date = GenericFormatter.SHORT_MILSEC_FORMATTER.format().getBytes();
|
||||
data.put(TABLE_API_COL_TYPE, UTF8.getBytes(type));
|
||||
data.put(TABLE_API_COL_COMMENT, UTF8.getBytes(comment));
|
||||
byte[] date = UTF8.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format());
|
||||
data.put(TABLE_API_COL_DATE_RECORDING, date);
|
||||
data.put(TABLE_API_COL_DATE_LAST_EXEC, date);
|
||||
data.put(TABLE_API_COL_URL, apiurl.getBytes());
|
||||
data.put(TABLE_API_COL_URL, UTF8.getBytes(apiurl));
|
||||
|
||||
// insert APICALL attributes
|
||||
data.put(TABLE_API_COL_APICALL_COUNT, "1".getBytes());
|
||||
data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, Integer.toString(time).getBytes());
|
||||
data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
|
||||
data.put(TABLE_API_COL_APICALL_COUNT, UTF8.getBytes("1"));
|
||||
data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, UTF8.getBytes(Integer.toString(time)));
|
||||
data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, UTF8.getBytes(unit));
|
||||
calculateAPIScheduler(data, false); // set next execution time
|
||||
pk = super.insert(TABLE_API_NAME, data);
|
||||
} catch (IOException e) {
|
||||
|
@ -220,7 +220,7 @@ public class WorkTables extends Tables {
|
|||
for (String pk: pks) {
|
||||
Tables.Row row = null;
|
||||
try {
|
||||
row = select(WorkTables.TABLE_API_NAME, pk.getBytes());
|
||||
row = select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
|
||||
} catch (IOException e) {
|
||||
Log.logException(e);
|
||||
} catch (RowSpaceExceededException e) {
|
||||
|
@ -307,11 +307,11 @@ public class WorkTables extends Tables {
|
|||
try {
|
||||
// create and insert new entry
|
||||
Data data = new Data();
|
||||
byte[] date = GenericFormatter.SHORT_MILSEC_FORMATTER.format().getBytes();
|
||||
byte[] date = UTF8.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format());
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_URL, url.toNormalform(true, false));
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_DATE, date);
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_WORDS, queryHashes.export());
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_COMMENT, reason.getBytes());
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_COMMENT, UTF8.getBytes(reason));
|
||||
super.insert(TABLE_SEARCH_FAILURE_NAME, url.hash(), data);
|
||||
} catch (IOException e) {
|
||||
Log.logException(e);
|
||||
|
|
|
@ -321,7 +321,7 @@ public class WikiBoard {
|
|||
* @param subject subject of child of current Entry.
|
||||
*/
|
||||
void setChild(final String subject) {
|
||||
record.put("child", Base64Order.enhancedCoder.encode(subject.getBytes()));
|
||||
record.put("child", Base64Order.enhancedCoder.encode(UTF8.getBytes(subject)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -369,9 +369,9 @@ public class WikiBoard {
|
|||
entry.setAncestorDate(oldDate);
|
||||
oldEntry.setChild(entry.subject());
|
||||
// write the backup
|
||||
bkpbase.insert((entry.key + dateString(oldDate)).getBytes(), oldEntry.record);
|
||||
bkpbase.insert(UTF8.getBytes(entry.key + dateString(oldDate)), oldEntry.record);
|
||||
// write the new page
|
||||
datbase.insert(entry.key.getBytes(), entry.record);
|
||||
datbase.insert(UTF8.getBytes(entry.key), entry.record);
|
||||
key = entry.key;
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
|
@ -401,8 +401,8 @@ public class WikiBoard {
|
|||
if (copyOfKey.length() > keyLength) {
|
||||
copyOfKey = copyOfKey.substring(0, keyLength);
|
||||
}
|
||||
final Map<String, String> record = base.get(copyOfKey.getBytes());
|
||||
ret = (record == null) ? newEntry(copyOfKey, ANONYMOUS, "127.0.0.1", "New Page", "".getBytes()) : new Entry(copyOfKey, record);
|
||||
final Map<String, String> record = base.get(UTF8.getBytes(copyOfKey));
|
||||
ret = (record == null) ? newEntry(copyOfKey, ANONYMOUS, "127.0.0.1", "New Page", UTF8.getBytes("")) : new Entry(copyOfKey, record);
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
} catch (RowSpaceExceededException e) {
|
||||
|
|
|
@ -1325,17 +1325,19 @@ public final class HTTPDFileHandler {
|
|||
sbuffer = sbuffer.replaceAll("(href|src)='([^:\"]+)'", "$1='/proxy.html?url=http://"+proxyurl.getHost()+directory+"/$2'");
|
||||
sbuffer = sbuffer.replaceAll("url\\(", "url(/proxy.html?url=http://"+proxyurl.getHost()+proxyurl.getPath());
|
||||
|
||||
byte[] sbb = UTF8.getBytes(sbuffer);
|
||||
|
||||
if (outgoingHeader.containsKey(HeaderFramework.TRANSFER_ENCODING)) {
|
||||
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, httpStatus, outgoingHeader);
|
||||
|
||||
out = new ChunkedOutputStream(out);
|
||||
} else {
|
||||
outgoingHeader.put(HeaderFramework.CONTENT_LENGTH, Integer.toString(sbuffer.getBytes().length));
|
||||
outgoingHeader.put(HeaderFramework.CONTENT_LENGTH, Integer.toString(sbb.length));
|
||||
|
||||
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, httpStatus, outgoingHeader);
|
||||
}
|
||||
|
||||
out.write(UTF8.getBytes(sbuffer));
|
||||
out.write(sbb);
|
||||
} else {
|
||||
if (!outgoingHeader.containsKey(HeaderFramework.CONTENT_LENGTH))
|
||||
outgoingHeader.put(HeaderFramework.CONTENT_LENGTH, prop.getProperty(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE));
|
||||
|
|
|
@ -410,7 +410,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
HTTPDFileHandler.doGet(prop, header, session.out);
|
||||
} else {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted, 1)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this server, because you are using a non-granted IP (" + session.userAddress.getHostAddress() + "). allowed are only connections that match with the following filter: " + switchboard.getConfig("serverClient", "*") + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted, 1)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this server, because you are using a non-granted IP (" + session.userAddress.getHostAddress() + "). allowed are only connections that match with the following filter: " + switchboard.getConfig("serverClient", "*") + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
} else {
|
||||
|
@ -420,7 +420,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
HTTPDProxyHandler.doGet(prop, header, session.out);
|
||||
} else {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted, 2)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using a non-granted IP (" + session.userAddress.getHostAddress() + "). allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted, 2)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using a non-granted IP (" + session.userAddress.getHostAddress() + "). allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
}
|
||||
|
@ -477,8 +477,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
HTTPDFileHandler.doHead(prop, header, session.out);
|
||||
} else {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted)" +
|
||||
serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
} else {
|
||||
|
@ -488,8 +487,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
HTTPDProxyHandler.doHead(prop, header, session.out);
|
||||
} else {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted)" +
|
||||
serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
}
|
||||
|
@ -523,7 +521,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
// "A server which receives an entity-body with a transfer-coding it does
|
||||
// not understand SHOULD return 501 (Unimplemented), and close the
|
||||
// connection." [RFC 2616, section 3.6]
|
||||
session.out.write((httpVersion + " 501 transfer-encoding not implemented" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you send a transfer-encoding to this server, which is not supported: " + transferEncoding + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 501 transfer-encoding not implemented" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you send a transfer-encoding to this server, which is not supported: " + transferEncoding + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
} else {
|
||||
|
@ -543,7 +541,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
HTTPDFileHandler.doPost(prop, header, session.out, sessionIn);
|
||||
} else {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this server, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("serverClient", "*") + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this server, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("serverClient", "*") + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
} else {
|
||||
|
@ -553,7 +551,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
HTTPDProxyHandler.doPost(prop, header, session.out, sessionIn);
|
||||
} else {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
}
|
||||
|
@ -619,15 +617,15 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
|
||||
if (!(allowProxy(session))) {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
|
||||
if (port != 443 && switchboard.getConfig("secureHttps", "true").equals("true")) {
|
||||
// security: connection only to ssl port
|
||||
// we send a 403 (forbidden) error back
|
||||
session.out.write((httpVersion + " 403 Connection to non-443 forbidden" +
|
||||
serverCore.CRLF_STRING + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 Connection to non-443 forbidden" +
|
||||
serverCore.CRLF_STRING + serverCore.CRLF_STRING));
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
|
||||
|
@ -637,7 +635,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
|
|||
HTTPDProxyHandler.doConnect(prop, header, session.in, session.out);
|
||||
} else {
|
||||
// not authorized through firewall blocking (ip does not match filter)
|
||||
session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING).getBytes());
|
||||
session.out.write(UTF8.getBytes(httpVersion + " 403 refused (IP not granted)" + serverCore.CRLF_STRING + serverCore.CRLF_STRING + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + session.userAddress.getHostAddress() + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.CRLF_STRING));
|
||||
}
|
||||
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
|
|
|
@ -401,9 +401,9 @@ public final class TemplateEngine {
|
|||
if (br != null) try { br.close(); br=null; } catch (final Exception e) {}
|
||||
}
|
||||
final PushbackInputStream pis2 = new PushbackInputStream(new ByteArrayInputStream(include.getBytes()));
|
||||
structure.append("<fileinclude file=\"".getBytes()).append(filename).append(">\n".getBytes());
|
||||
structure.append(UTF8.getBytes("<fileinclude file=\"")).append(filename).append(UTF8.getBytes(">\n"));
|
||||
structure.append(writeTemplate(pis2, out, pattern, dflt, prefix));
|
||||
structure.append("</fileinclude>\n".getBytes());
|
||||
structure.append(UTF8.getBytes("</fileinclude>\n"));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -450,7 +450,7 @@ public final class TemplateEngine {
|
|||
|
||||
private final static byte[] newPrefix(final byte[] oldPrefix, final byte[] multi_key, final int i) {
|
||||
final ByteBuffer newPrefix = new ByteBuffer(oldPrefix.length + multi_key.length + 8);
|
||||
newPrefix.append(oldPrefix).append(multi_key).append(ul).append(Integer.toString(i).getBytes()).append(ul);
|
||||
newPrefix.append(oldPrefix).append(multi_key).append(ul).append(UTF8.getBytes(Integer.toString(i))).append(ul);
|
||||
try {
|
||||
newPrefix.close();
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -213,7 +213,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
|
|||
|
||||
public kiter(final boolean up, final String firstHash) throws IOException {
|
||||
this.up = up;
|
||||
this.iter = urlIndexFile.rows(up, (firstHash == null) ? null : firstHash.getBytes());
|
||||
this.iter = urlIndexFile.rows(up, (firstHash == null) ? null : UTF8.getBytes(firstHash));
|
||||
this.error = false;
|
||||
}
|
||||
|
||||
|
@ -274,7 +274,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
|
|||
final Iterator<String> eiter2 = damagedURLS.iterator();
|
||||
byte[] urlHashBytes;
|
||||
while (eiter2.hasNext()) {
|
||||
urlHashBytes = eiter2.next().getBytes();
|
||||
urlHashBytes = UTF8.getBytes(eiter2.next());
|
||||
|
||||
// trying to fix the invalid URL
|
||||
String oldUrlStr = null;
|
||||
|
@ -293,7 +293,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
|
|||
|
||||
if (client.HEADResponse(newUrl.toString()) != null
|
||||
&& client.getHttpResponse().getStatusLine().getStatusCode() == 200) {
|
||||
entry.setCol(1, newUrl.toString().getBytes());
|
||||
entry.setCol(1, UTF8.getBytes(newUrl.toString()));
|
||||
urlIndexFile.put(entry);
|
||||
if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + UTF8.String(urlHashBytes) + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
|
||||
} else {
|
||||
|
@ -585,7 +585,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
|
|||
TreeSet<String> set = new TreeSet<String>();
|
||||
for (hashStat hs: map.values()) {
|
||||
if (hs == null) continue;
|
||||
urlref = this.load(hs.urlhash.getBytes(), null, 0);
|
||||
urlref = this.load(UTF8.getBytes(hs.urlhash), null, 0);
|
||||
if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue;
|
||||
set.add(urlref.metadata().url().getHost());
|
||||
count--;
|
||||
|
@ -619,7 +619,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
|
|||
while (j.hasNext()) {
|
||||
urlhash = j.next();
|
||||
if (urlhash == null) continue;
|
||||
urlref = this.load(urlhash.getBytes(), null, 0);
|
||||
urlref = this.load(UTF8.getBytes(urlhash), null, 0);
|
||||
if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue;
|
||||
if (statsDump == null) return new ArrayList<hostStat>().iterator(); // some other operation has destroyed the object
|
||||
comps = urlref.metadata();
|
||||
|
@ -675,7 +675,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
|
|||
// then delete the urls using this list
|
||||
int cnt = 0;
|
||||
for (String h: l) {
|
||||
if (urlIndexFile.delete(h.getBytes())) cnt++;
|
||||
if (urlIndexFile.delete(UTF8.getBytes(h))) cnt++;
|
||||
}
|
||||
|
||||
// finally remove the line with statistics
|
||||
|
|
|
@ -107,19 +107,20 @@ public final class QueryParams {
|
|||
public final String userAgent;
|
||||
public boolean filterfailurls;
|
||||
|
||||
public QueryParams(final String queryString,
|
||||
public QueryParams(
|
||||
final String queryString,
|
||||
final int itemsPerPage,
|
||||
final Bitfield constraint,
|
||||
final Segment indexSegment,
|
||||
final RankingProfile ranking,
|
||||
final String userAgent) {
|
||||
|
||||
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) {
|
||||
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(UTF8.getBytes(queryString)))) {
|
||||
this.queryString = null;
|
||||
this.queryHashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
|
||||
this.excludeHashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
|
||||
try {
|
||||
this.queryHashes.put(queryString.getBytes());
|
||||
this.queryHashes.put(UTF8.getBytes(queryString));
|
||||
} catch (RowSpaceExceededException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -255,7 +256,7 @@ public final class QueryParams {
|
|||
final HandleSet keyhashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
|
||||
if (query != null) {
|
||||
for (int i = 0; i < (query.length() / Word.commonHashLength); i++) try {
|
||||
keyhashes.put(query.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength).getBytes());
|
||||
keyhashes.put(UTF8.getBytes(query.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength)));
|
||||
} catch (RowSpaceExceededException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -267,7 +268,7 @@ public final class QueryParams {
|
|||
final HandleSet keyhashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
|
||||
if (query != null) {
|
||||
for (int i = 0; i < (query.length() / Word.commonHashLength); i++) try {
|
||||
keyhashes.put(query.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength).getBytes());
|
||||
keyhashes.put(UTF8.getBytes(query.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength)));
|
||||
} catch (RowSpaceExceededException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
|
|
@ -587,7 +587,7 @@ public final class RankingProcess extends Thread {
|
|||
domhash = domhashs.next();
|
||||
if (domhash == null) continue;
|
||||
urlhash = this.hostResolver.get(domhash);
|
||||
row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash.getBytes(), null, 0);
|
||||
row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(UTF8.getBytes(urlhash), null, 0);
|
||||
hostname = row == null ? null : row.metadata().url().getHost();
|
||||
if (hostname != null) {
|
||||
result.set(hostname, this.hostNavigator.get(domhash));
|
||||
|
|
|
@ -1045,7 +1045,7 @@ public final class Switchboard extends serverSwitch {
|
|||
return network.indexOf(peer) >= 0;
|
||||
} else if (clustermode.equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER)) {
|
||||
// check if we got the request from a peer in the public cluster
|
||||
return this.clusterhashes.containsKey(peer.getBytes());
|
||||
return this.clusterhashes.containsKey(UTF8.getBytes(peer));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
@ -1063,7 +1063,7 @@ public final class Switchboard extends serverSwitch {
|
|||
return network.indexOf(seed.getPublicAddress()) >= 0;
|
||||
} else if (clustermode.equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER)) {
|
||||
// check if we got the request from a peer in the public cluster
|
||||
return this.clusterhashes.containsKey(seed.hash.getBytes());
|
||||
return this.clusterhashes.containsKey(UTF8.getBytes(seed.hash));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
@ -1363,7 +1363,7 @@ public final class Switchboard extends serverSwitch {
|
|||
// create a queue entry
|
||||
Document document = surrogate.document();
|
||||
Request request = new Request(
|
||||
peers.mySeed().hash.getBytes(),
|
||||
UTF8.getBytes(peers.mySeed().hash),
|
||||
surrogate.getIdentifier(true),
|
||||
null,
|
||||
"",
|
||||
|
@ -1525,7 +1525,7 @@ public final class Switchboard extends serverSwitch {
|
|||
Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE)));
|
||||
insert = true;
|
||||
}
|
||||
if (insert) crawler.putActive(selentry.handle().getBytes(), selentry);
|
||||
if (insert) crawler.putActive(UTF8.getBytes(selentry.handle()), selentry);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
|
@ -1550,7 +1550,7 @@ public final class Switchboard extends serverSwitch {
|
|||
Log.logException(e);
|
||||
}
|
||||
for (final String pk: pks) try {
|
||||
row = this.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
|
||||
row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
|
||||
WorkTables.calculateAPIScheduler(row, true); // calculate next update time
|
||||
this.tables.update(WorkTables.TABLE_API_NAME, row);
|
||||
} catch (IOException e) {
|
||||
|
@ -1976,7 +1976,7 @@ public final class Switchboard extends serverSwitch {
|
|||
condenser,
|
||||
searchEvent,
|
||||
sourceName);
|
||||
yacyChannel.channels(Base64Order.enhancedCoder.equal(queueEntry.initiator(), peers.mySeed().hash.getBytes()) ? yacyChannel.LOCALINDEXING : yacyChannel.REMOTEINDEXING).addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url().toNormalform(true, false)));
|
||||
yacyChannel.channels(Base64Order.enhancedCoder.equal(queueEntry.initiator(), UTF8.getBytes(peers.mySeed().hash)) ? yacyChannel.LOCALINDEXING : yacyChannel.REMOTEINDEXING).addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url().toNormalform(true, false)));
|
||||
} catch (final IOException e) {
|
||||
//if (this.log.isFine()) log.logFine("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase);
|
||||
addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, "error storing url: " + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase + ", error = " + e.getMessage());
|
||||
|
@ -1987,8 +1987,8 @@ public final class Switchboard extends serverSwitch {
|
|||
for (final Map.Entry<MultiProtocolURI, String> rssEntry : document.getRSS().entrySet()) {
|
||||
final Tables.Data rssRow = new Tables.Data();
|
||||
rssRow.put("referrer", queueEntry.url().hash());
|
||||
rssRow.put("url", rssEntry.getKey().toNormalform(true, false).getBytes());
|
||||
rssRow.put("title", rssEntry.getValue().getBytes());
|
||||
rssRow.put("url", UTF8.getBytes(rssEntry.getKey().toNormalform(true, false)));
|
||||
rssRow.put("title", UTF8.getBytes(rssEntry.getValue()));
|
||||
rssRow.put("recording_date", new Date());
|
||||
try {
|
||||
this.tables.update("rss", new DigestURI(rssEntry.getKey()).hash(), rssRow);
|
||||
|
@ -2001,7 +2001,7 @@ public final class Switchboard extends serverSwitch {
|
|||
ResultURLs.stack(
|
||||
newEntry, // loaded url db entry
|
||||
queueEntry.initiator(), // initiator peer hash
|
||||
this.peers.mySeed().hash.getBytes(), // executor peer hash
|
||||
UTF8.getBytes(this.peers.mySeed().hash), // executor peer hash
|
||||
processCase // process case
|
||||
);
|
||||
|
||||
|
@ -2079,7 +2079,7 @@ public final class Switchboard extends serverSwitch {
|
|||
}
|
||||
if (indexSegments.segment(process).urlMetadata.exists(url.hash())) return; // don't do double-work
|
||||
final Request request = loader.request(url, true, true);
|
||||
final CrawlProfile profile = sb.crawler.getActive(request.profileHandle().getBytes());
|
||||
final CrawlProfile profile = sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
|
||||
String acceptedError = this.crawlStacker.checkAcceptance(url, profile, 0);
|
||||
if (acceptedError != null) {
|
||||
log.logWarning("addToIndex: cannot load " + url.toNormalform(false, false) + ": " + acceptedError);
|
||||
|
|
|
@ -351,7 +351,6 @@ public final class SwitchboardConstants {
|
|||
* whole database of known RWIs and URLs as well as dumps of the DHT-In and DHT-Out caches are stored</p>
|
||||
*/
|
||||
public static final String INDEX_PRIMARY_PATH = "indexPrimaryPath"; // this is a relative path to the data root
|
||||
public static final String INDEX_SECONDARY_PATH = "indexSecondaryPath"; // this is a absolute path to any location
|
||||
public static final String INDEX_PATH_DEFAULT = "DATA/INDEX";
|
||||
/**
|
||||
* <p><code>public static final String <strong>LISTS_PATH</strong> = "listsPath"</code></p>
|
||||
|
|
|
@ -261,10 +261,10 @@ public class cryptbig {
|
|||
final String A = UTF8.String(ecipher.doFinal(X.getBytes("UTF8")));
|
||||
final String B = UTF8.String(ecipher.doFinal(Base64Order.standardCoder.encodeLongSB(A.length(), 2).toString().getBytes("UTF8"))); // most probable not longer than 4
|
||||
final String C = Base64Order.standardCoder.encodeLongSB(B.length(), 1).toString(); // fixed length 1 (6 bits, that should be enough)
|
||||
fout.write(magicString.getBytes()); // the magic string, used to identify a 'crypt'-file
|
||||
fout.write(C.getBytes());
|
||||
fout.write(B.getBytes());
|
||||
fout.write(A.getBytes());
|
||||
fout.write(UTF8.getBytes(magicString)); // the magic string, used to identify a 'crypt'-file
|
||||
fout.write(UTF8.getBytes(C));
|
||||
fout.write(UTF8.getBytes(B));
|
||||
fout.write(UTF8.getBytes(A));
|
||||
|
||||
// write content of file
|
||||
copy(fout, fin, 512);
|
||||
|
|
|
@ -191,7 +191,7 @@ public class PeerSelection {
|
|||
}
|
||||
|
||||
public static byte[] selectTransferStart() {
|
||||
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + Word.commonHashLength).getBytes();
|
||||
return UTF8.getBytes(Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + Word.commonHashLength));
|
||||
}
|
||||
|
||||
public static byte[] limitOver(final yacySeedDB seedDB, final byte[] startHash) {
|
||||
|
|
|
@ -30,6 +30,7 @@ package de.anomic.yacy;
|
|||
|
||||
import java.io.File;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
||||
import de.anomic.search.Switchboard;
|
||||
|
@ -47,7 +48,7 @@ public class yacyAccessible {
|
|||
final Switchboard sb = Switchboard.getSwitchboard();
|
||||
final File shortcut = new File(sb.getAppPath() + "/addon/YaCy-Search.html".replace("/", File.separator));
|
||||
final String content = "<meta http-equiv=\"refresh\" content=\"0;url=http://localhost:" + newPort + "/\">";
|
||||
FileUtils.copy(content.getBytes(), shortcut);
|
||||
FileUtils.copy(UTF8.getBytes(content), shortcut);
|
||||
} catch (final Exception e) {
|
||||
return;
|
||||
}
|
||||
|
@ -62,7 +63,7 @@ public class yacyAccessible {
|
|||
final Switchboard sb = Switchboard.getSwitchboard();
|
||||
final File shortcut = new File(sb.getAppPath() + "/addon/YaCy-Search.bat".replace("/", File.separator));
|
||||
final String content = "rundll32 url.dll,FileProtocolHandler \"http://localhost:" + newPort + "\"";
|
||||
FileUtils.copy(content.getBytes(), shortcut);
|
||||
FileUtils.copy(UTF8.getBytes(content), shortcut);
|
||||
} catch (final Exception e) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -437,7 +437,7 @@ public final class yacyClient {
|
|||
final ReferenceContainer<WordReference>[] container = new ReferenceContainer[words];
|
||||
for (int i = 0; i < words; i++) {
|
||||
try {
|
||||
container[i] = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhashes.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength).getBytes(), count);
|
||||
container[i] = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, UTF8.getBytes(wordhashes.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength)), count);
|
||||
} catch (RowSpaceExceededException e) {
|
||||
Log.logException(e);
|
||||
return -1;
|
||||
|
@ -479,7 +479,7 @@ public final class yacyClient {
|
|||
// passed all checks, store url
|
||||
try {
|
||||
indexSegment.urlMetadata().store(urlEntry);
|
||||
ResultURLs.stack(urlEntry, mySeed.hash.getBytes(), target.hash.getBytes(), EventOrigin.QUERIES);
|
||||
ResultURLs.stack(urlEntry, mySeed.hash.getBytes(), UTF8.getBytes(target.hash), EventOrigin.QUERIES);
|
||||
} catch (final IOException e) {
|
||||
yacyCore.log.logWarning("could not store search result", e);
|
||||
continue; // db-error
|
||||
|
@ -672,10 +672,10 @@ public final class yacyClient {
|
|||
indexabstract = new TreeMap<byte[], String>(Base64Order.enhancedCoder);
|
||||
for (Map.Entry<String, String> entry: resultMap.entrySet()) {
|
||||
if (entry.getKey().startsWith("indexcount.")) {
|
||||
indexcount.put(entry.getKey().substring(11).getBytes(), Integer.parseInt(entry.getValue()));
|
||||
indexcount.put(UTF8.getBytes(entry.getKey().substring(11)), Integer.parseInt(entry.getValue()));
|
||||
}
|
||||
if (entry.getKey().startsWith("indexabstract.")) {
|
||||
indexabstract.put(entry.getKey().substring(14).getBytes(), entry.getValue());
|
||||
indexabstract.put(UTF8.getBytes(entry.getKey().substring(14)), entry.getValue());
|
||||
}
|
||||
}
|
||||
references = resultMap.get("references").split(",");
|
||||
|
@ -862,7 +862,7 @@ public final class yacyClient {
|
|||
// extract the urlCache from the result
|
||||
final URIMetadataRow[] urls = new URIMetadataRow[uhs.length];
|
||||
for (int i = 0; i < uhs.length; i++) {
|
||||
urls[i] = urlCache.get(uhs[i].getBytes());
|
||||
urls[i] = urlCache.get(UTF8.getBytes(uhs[i]));
|
||||
if (urls[i] == null) {
|
||||
if (yacyCore.log.isFine()) yacyCore.log.logFine("DEBUG transferIndex: requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
|
||||
}
|
||||
|
@ -1027,7 +1027,7 @@ public final class yacyClient {
|
|||
searchlines.add(args[2]);
|
||||
}
|
||||
for (final String line: searchlines) {
|
||||
final byte[] wordhashe = QueryParams.hashSet2hashString(Word.words2hashesHandles(QueryParams.cleanQuery(line)[0])).getBytes();
|
||||
final byte[] wordhashe = UTF8.getBytes(QueryParams.hashSet2hashString(Word.words2hashesHandles(QueryParams.cleanQuery(line)[0])));
|
||||
long time = System.currentTimeMillis();
|
||||
SearchResult result;
|
||||
try {
|
||||
|
|
|
@ -137,7 +137,7 @@ public class yacyNewsDB {
|
|||
}
|
||||
|
||||
public void remove(final String id) throws IOException {
|
||||
news.delete(id.getBytes());
|
||||
news.delete(UTF8.getBytes(id));
|
||||
}
|
||||
|
||||
public synchronized Record put(final Record record) throws IOException, RowSpaceExceededException {
|
||||
|
@ -151,7 +151,7 @@ public class yacyNewsDB {
|
|||
|
||||
public synchronized Record get(final String id) throws IOException {
|
||||
try {
|
||||
return b2r(news.get(id.getBytes()));
|
||||
return b2r(news.get(UTF8.getBytes(id)));
|
||||
} catch (final kelondroException e) {
|
||||
resetDB();
|
||||
return null;
|
||||
|
@ -180,9 +180,9 @@ public class yacyNewsDB {
|
|||
attributes = new HashMap<String, String>().toString();
|
||||
}
|
||||
final Row.Entry entry = this.news.row().newEntry();
|
||||
entry.setCol(0, r.id().getBytes());
|
||||
entry.setCol(0, UTF8.getBytes(r.id()));
|
||||
entry.setCol(1, UTF8.getBytes(r.category()));
|
||||
entry.setCol(2, (r.received() == null) ? null : my_SHORT_SECOND_FORMATTER.format(r.received()).getBytes());
|
||||
entry.setCol(2, (r.received() == null) ? null : UTF8.getBytes(my_SHORT_SECOND_FORMATTER.format(r.received())));
|
||||
entry.setCol(3, Base64Order.enhancedCoder.encodeLongBA(r.distributed(), 2));
|
||||
entry.setCol(4, UTF8.getBytes(attributes));
|
||||
return entry;
|
||||
|
|
|
@ -50,6 +50,7 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
|
||||
import net.yacy.cora.date.GenericFormatter;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.index.Column;
|
||||
import net.yacy.kelondro.index.Row;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
|
@ -146,7 +147,7 @@ public class yacyNewsQueue {
|
|||
record = i.next();
|
||||
if ((record != null) && (record.id().equals(id))) {
|
||||
try {
|
||||
this.queueStack.remove(id.getBytes());
|
||||
this.queueStack.remove(UTF8.getBytes(id));
|
||||
} catch (IOException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -167,8 +168,8 @@ public class yacyNewsQueue {
|
|||
if (r == null) return null;
|
||||
newsDB.put(r);
|
||||
final Row.Entry b = queueStack.row().newEntry(new byte[][]{
|
||||
r.id().getBytes(),
|
||||
GenericFormatter.SHORT_SECOND_FORMATTER.format().getBytes()});
|
||||
UTF8.getBytes(r.id()),
|
||||
UTF8.getBytes(GenericFormatter.SHORT_SECOND_FORMATTER.format())});
|
||||
return b;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.Map;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.cora.document.RSSMessage;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.util.MapTools;
|
||||
|
||||
|
@ -205,7 +206,7 @@ public class yacyPeerActions {
|
|||
// we do this if we did not get contact with the other peer
|
||||
if (yacyCore.log.isFine()) yacyCore.log.logFine("connect: no contact to a " + peer.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN) + " peer '" + peer.getName() + "' at " + peer.getPublicAddress() + ". Cause: " + cause);
|
||||
synchronized (seedDB) {
|
||||
if (!seedDB.hasDisconnected(peer.hash.getBytes())) { disconnects++; }
|
||||
if (!seedDB.hasDisconnected(UTF8.getBytes(peer.hash))) { disconnects++; }
|
||||
peer.put("dct", Long.toString(System.currentTimeMillis()));
|
||||
seedDB.addDisconnected(peer); // update info
|
||||
}
|
||||
|
|
|
@ -220,7 +220,7 @@ public class yacySearch extends Thread {
|
|||
// prepare seed targets and threads
|
||||
final yacySeed targetPeer = peers.getConnected(targethash);
|
||||
if (targetPeer == null || targetPeer.hash == null) return null;
|
||||
if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(targetPeer.hash.getBytes()));
|
||||
if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(UTF8.getBytes(targetPeer.hash)));
|
||||
final yacySearch searchThread = new yacySearch(
|
||||
wordhashes, "", urlhashes, Pattern.compile(""), Pattern.compile(".*"), "", "", "", 20, time, 9999, true, 0, targetPeer,
|
||||
indexSegment, peers, containerCache, null, blacklist, rankingProfile, constraint);
|
||||
|
|
|
@ -615,13 +615,13 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
|
|||
|
||||
private boolean getFlag(final int flag) {
|
||||
final String flags = get(yacySeed.FLAGS, yacySeed.FLAGSZERO);
|
||||
return (new bitfield(flags.getBytes())).get(flag);
|
||||
return (new bitfield(UTF8.getBytes(flags))).get(flag);
|
||||
}
|
||||
|
||||
private void setFlag(final int flag, final boolean value) {
|
||||
String flags = get(yacySeed.FLAGS, yacySeed.FLAGSZERO);
|
||||
if (flags.length() != 4) { flags = yacySeed.FLAGSZERO; }
|
||||
final bitfield f = new bitfield(flags.getBytes());
|
||||
final bitfield f = new bitfield(UTF8.getBytes(flags));
|
||||
f.set(flag, value);
|
||||
dna.put(yacySeed.FLAGS, UTF8.String(f.getBytes()));
|
||||
}
|
||||
|
@ -692,8 +692,8 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
|
|||
if (interval == null) return randomHash();
|
||||
|
||||
// find dht position and size of gap
|
||||
long left = FlatWordPartitionScheme.std.dhtPosition(interval.substring(0, 12).getBytes(), null);
|
||||
long right = FlatWordPartitionScheme.std.dhtPosition(interval.substring(12).getBytes(), null);
|
||||
long left = FlatWordPartitionScheme.std.dhtPosition(UTF8.getBytes(interval.substring(0, 12)), null);
|
||||
long right = FlatWordPartitionScheme.std.dhtPosition(UTF8.getBytes(interval.substring(12)), null);
|
||||
final long gap8 = FlatWordPartitionScheme.dhtDistance(left, right) >> 3; // 1/8 of a gap
|
||||
long gapx = gap8 + (Math.abs(random.nextLong()) % (6 * gap8));
|
||||
long gappos = (Long.MAX_VALUE - left >= gapx) ? left + gapx : (left - Long.MAX_VALUE) + gapx;
|
||||
|
@ -728,16 +728,16 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
|
|||
continue;
|
||||
}
|
||||
l = FlatWordPartitionScheme.dhtDistance(
|
||||
FlatWordPartitionScheme.std.dhtPosition(s0.hash.getBytes(), null),
|
||||
FlatWordPartitionScheme.std.dhtPosition(s1.hash.getBytes(), null));
|
||||
FlatWordPartitionScheme.std.dhtPosition(UTF8.getBytes(s0.hash), null),
|
||||
FlatWordPartitionScheme.std.dhtPosition(UTF8.getBytes(s1.hash), null));
|
||||
gaps.put(l, s0.hash + s1.hash);
|
||||
s0 = s1;
|
||||
}
|
||||
// compute also the last gap
|
||||
if ((first != null) && (s0 != null)) {
|
||||
l = FlatWordPartitionScheme.dhtDistance(
|
||||
FlatWordPartitionScheme.std.dhtPosition(s0.hash.getBytes(), null),
|
||||
FlatWordPartitionScheme.std.dhtPosition(first.hash.getBytes(), null));
|
||||
FlatWordPartitionScheme.std.dhtPosition(UTF8.getBytes(s0.hash), null),
|
||||
FlatWordPartitionScheme.std.dhtPosition(UTF8.getBytes(first.hash), null));
|
||||
gaps.put(l, s0.hash + first.hash);
|
||||
}
|
||||
return gaps;
|
||||
|
@ -768,7 +768,7 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
|
|||
final String hash =
|
||||
Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))).substring(0, 6) +
|
||||
Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))).substring(0, 6);
|
||||
return hash.getBytes();
|
||||
return UTF8.getBytes(hash);
|
||||
}
|
||||
|
||||
public static yacySeed genRemoteSeed(final String seedStr, final String key, final boolean ownSeed) throws IOException {
|
||||
|
|
|
@ -41,6 +41,7 @@ import java.util.TreeMap;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
|
@ -293,9 +294,9 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if (seedActiveDB.isEmpty() && seedPassiveDB.isEmpty() && seedPotentialDB.isEmpty()) return; // avoid that the own seed is initialized too early
|
||||
if (this.mySeed == null) initMySeed();
|
||||
try {
|
||||
seedActiveDB.delete(mySeed.hash.getBytes());
|
||||
seedPassiveDB.delete(mySeed.hash.getBytes());
|
||||
seedPotentialDB.delete(mySeed.hash.getBytes());
|
||||
seedActiveDB.delete(UTF8.getBytes(mySeed.hash));
|
||||
seedPassiveDB.delete(UTF8.getBytes(mySeed.hash));
|
||||
seedPotentialDB.delete(UTF8.getBytes(mySeed.hash));
|
||||
} catch (final IOException e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
|
||||
}
|
||||
|
||||
|
@ -401,7 +402,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if (seed == null) {
|
||||
yacyCore.log.logWarning("cluster peer '" + yacydom + "' was not found.");
|
||||
} else {
|
||||
clustermap.put(hash.getBytes(), ipport);
|
||||
clustermap.put(UTF8.getBytes(hash), ipport);
|
||||
}
|
||||
} else if (yacydom.endsWith(".yacy")) {
|
||||
// find a peer with its name
|
||||
|
@ -409,7 +410,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if (seed == null) {
|
||||
yacyCore.log.logWarning("cluster peer '" + yacydom + "' was not found.");
|
||||
} else {
|
||||
clustermap.put(seed.hash.getBytes(), ipport);
|
||||
clustermap.put(UTF8.getBytes(seed.hash), ipport);
|
||||
}
|
||||
} else {
|
||||
yacyCore.log.logWarning("cluster peer '" + addresses[i] + "' has wrong syntax. the name must end with .yacy or .yacyh");
|
||||
|
@ -494,10 +495,10 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
nameLookupCache.put(seed.getName(), seed.hash);
|
||||
final Map<String, String> seedPropMap = seed.getMap();
|
||||
synchronized (seedPropMap) {
|
||||
seedActiveDB.insert(seed.hash.getBytes(), seedPropMap);
|
||||
seedActiveDB.insert(UTF8.getBytes(seed.hash), seedPropMap);
|
||||
}
|
||||
seedPassiveDB.delete(seed.hash.getBytes());
|
||||
seedPotentialDB.delete(seed.hash.getBytes());
|
||||
seedPassiveDB.delete(UTF8.getBytes(seed.hash));
|
||||
seedPotentialDB.delete(UTF8.getBytes(seed.hash));
|
||||
} catch (final Exception e) {
|
||||
yacyCore.log.logSevere("ERROR add: seed.db corrupt (" + e.getMessage() + "); resetting seed.db", e);
|
||||
resetActiveTable();
|
||||
|
@ -508,14 +509,14 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if (seed.isProper(false) != null) return;
|
||||
try {
|
||||
nameLookupCache.remove(seed.getName());
|
||||
seedActiveDB.delete(seed.hash.getBytes());
|
||||
seedPotentialDB.delete(seed.hash.getBytes());
|
||||
seedActiveDB.delete(UTF8.getBytes(seed.hash));
|
||||
seedPotentialDB.delete(UTF8.getBytes(seed.hash));
|
||||
} catch (final Exception e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
|
||||
//seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
|
||||
try {
|
||||
final Map<String, String> seedPropMap = seed.getMap();
|
||||
synchronized (seedPropMap) {
|
||||
seedPassiveDB.insert(seed.hash.getBytes(), seedPropMap);
|
||||
seedPassiveDB.insert(UTF8.getBytes(seed.hash), seedPropMap);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
yacyCore.log.logSevere("ERROR add: seed.db corrupt (" + e.getMessage() + "); resetting seed.db", e);
|
||||
|
@ -527,14 +528,14 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if (seed.isProper(false) != null) return;
|
||||
try {
|
||||
nameLookupCache.remove(seed.getName());
|
||||
seedActiveDB.delete(seed.hash.getBytes());
|
||||
seedPassiveDB.delete(seed.hash.getBytes());
|
||||
seedActiveDB.delete(UTF8.getBytes(seed.hash));
|
||||
seedPassiveDB.delete(UTF8.getBytes(seed.hash));
|
||||
} catch (final Exception e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
|
||||
//seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
|
||||
try {
|
||||
final Map<String, String> seedPropMap = seed.getMap();
|
||||
synchronized (seedPropMap) {
|
||||
seedPotentialDB.insert(seed.hash.getBytes(), seedPropMap);
|
||||
seedPotentialDB.insert(UTF8.getBytes(seed.hash), seedPropMap);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
yacyCore.log.logSevere("ERROR add: seed.db corrupt (" + e.getMessage() + "); resetting seed.db", e);
|
||||
|
@ -545,14 +546,14 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
public synchronized void removeDisconnected(final String peerHash) {
|
||||
if(peerHash == null) return;
|
||||
try {
|
||||
seedPassiveDB.delete(peerHash.getBytes());
|
||||
seedPassiveDB.delete(UTF8.getBytes(peerHash));
|
||||
} catch (final IOException e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
|
||||
}
|
||||
|
||||
public synchronized void removePotential(final String peerHash) {
|
||||
if(peerHash == null) return;
|
||||
try {
|
||||
seedPotentialDB.delete(peerHash.getBytes());
|
||||
seedPotentialDB.delete(UTF8.getBytes(peerHash));
|
||||
} catch (final IOException e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
|
||||
}
|
||||
|
||||
|
@ -573,7 +574,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if ((this.mySeed != null) && (hash.equals(mySeed.hash))) return mySeed;
|
||||
ConcurrentHashMap<String, String> entry = new ConcurrentHashMap<String, String>();
|
||||
try {
|
||||
Map<String, String> map = database.get(hash.getBytes());
|
||||
Map<String, String> map = database.get(UTF8.getBytes(hash));
|
||||
if (map == null) return null;
|
||||
entry.putAll(map);
|
||||
} catch (final IOException e) {
|
||||
|
@ -613,13 +614,13 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
}
|
||||
|
||||
yacySeed s = get(hash, seedActiveDB);
|
||||
if (s != null) try { seedActiveDB.insert(hash.getBytes(), seed.getMap()); return;} catch (final Exception e) {Log.logException(e);}
|
||||
if (s != null) try { seedActiveDB.insert(UTF8.getBytes(hash), seed.getMap()); return;} catch (final Exception e) {Log.logException(e);}
|
||||
|
||||
s = get(hash, seedPassiveDB);
|
||||
if (s != null) try { seedPassiveDB.insert(hash.getBytes(), seed.getMap()); return;} catch (final Exception e) {Log.logException(e);}
|
||||
if (s != null) try { seedPassiveDB.insert(UTF8.getBytes(hash), seed.getMap()); return;} catch (final Exception e) {Log.logException(e);}
|
||||
|
||||
s = get(hash, seedPotentialDB);
|
||||
if (s != null) try { seedPotentialDB.insert(hash.getBytes(), seed.getMap()); return;} catch (final Exception e) {Log.logException(e);}
|
||||
if (s != null) try { seedPotentialDB.insert(UTF8.getBytes(hash), seed.getMap()); return;} catch (final Exception e) {Log.logException(e);}
|
||||
}
|
||||
|
||||
public yacySeed lookupByName(String peerName) {
|
||||
|
@ -700,7 +701,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if (addressStr == null) {
|
||||
Log.logWarning("YACY","lookupByIP/Connected: address of seed " + seed.getName() + "/" + seed.hash + " is null.");
|
||||
try {
|
||||
badPeerHashes.put(seed.hash.getBytes());
|
||||
badPeerHashes.put(UTF8.getBytes(seed.hash));
|
||||
} catch (RowSpaceExceededException e1) {
|
||||
Log.logException(e1);
|
||||
break;
|
||||
|
@ -733,7 +734,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
if (addressStr == null) {
|
||||
Log.logWarning("YACY","lookupByIPDisconnected: address of seed " + seed.getName() + "/" + seed.hash + " is null.");
|
||||
try {
|
||||
badPeerHashes.put(seed.hash.getBytes());
|
||||
badPeerHashes.put(UTF8.getBytes(seed.hash));
|
||||
} catch (RowSpaceExceededException e1) {
|
||||
Log.logException(e1);
|
||||
break;
|
||||
|
|
|
@ -71,6 +71,83 @@ public class UTF8 {
|
|||
return new String(bytes, offset, length, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* getBytes() as method for String synchronizes during the look-up for the
|
||||
* Charset object for the default charset as given with a default charset name.
|
||||
* This is the normal process:
|
||||
|
||||
public byte[] getBytes() {
|
||||
return StringCoding.encode(value, offset, count);
|
||||
}
|
||||
|
||||
static byte[] encode(char[] ca, int off, int len) {
|
||||
String csn = Charset.defaultCharset().name();
|
||||
try {
|
||||
return encode(csn, ca, off, len);
|
||||
...
|
||||
|
||||
static byte[] encode(String charsetName, char[] ca, int off, int len)
|
||||
throws UnsupportedEncodingException
|
||||
{
|
||||
StringEncoder se = (StringEncoder)deref(encoder);
|
||||
String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
|
||||
if ((se == null) || !(csn.equals(se.requestedCharsetName())
|
||||
|| csn.equals(se.charsetName()))) {
|
||||
se = null;
|
||||
try {
|
||||
Charset cs = lookupCharset(csn);
|
||||
....
|
||||
|
||||
private static Charset lookupCharset(String csn) {
|
||||
if (Charset.isSupported(csn)) {
|
||||
try {
|
||||
return Charset.forName(csn);
|
||||
....
|
||||
|
||||
public static Charset forName(String charsetName) {
|
||||
Charset cs = lookup(charsetName);
|
||||
....
|
||||
|
||||
private static Charset lookup(String charsetName) {
|
||||
if (charsetName == null)
|
||||
throw new IllegalArgumentException("Null charset name");
|
||||
|
||||
Object[] a;
|
||||
if ((a = cache1) != null && charsetName.equals(a[0]))
|
||||
return (Charset)a[1];
|
||||
// We expect most programs to use one Charset repeatedly.
|
||||
// We convey a hint to this effect to the VM by putting the
|
||||
// level 1 cache miss code in a separate method.
|
||||
return lookup2(charsetName);
|
||||
}
|
||||
|
||||
private static Charset lookup2(String charsetName) {
|
||||
Object[] a;
|
||||
if ((a = cache2) != null && charsetName.equals(a[0])) {
|
||||
cache2 = cache1;
|
||||
cache1 = a;
|
||||
return (Charset)a[1];
|
||||
}
|
||||
|
||||
Charset cs;
|
||||
if ((cs = standardProvider.charsetForName(charsetName)) != null ||
|
||||
(cs = lookupExtendedCharset(charsetName)) != null ||
|
||||
(cs = lookupViaProviders(charsetName)) != null)
|
||||
{
|
||||
cache(charsetName, cs);
|
||||
....
|
||||
|
||||
At this point the getBytes() call synchronizes at one of the methods
|
||||
standardProvider.charsetForName
|
||||
lookupExtendedCharset
|
||||
lookupViaProviders
|
||||
|
||||
* with our call using a given charset object, the call is much easier to perform
|
||||
* and it omits the synchronization for the charset lookup.
|
||||
*
|
||||
* @param s
|
||||
* @return
|
||||
*/
|
||||
public final static byte[] getBytes(final String s) {
|
||||
if (s == null) return null;
|
||||
return s.getBytes(charset);
|
||||
|
|
|
@ -235,7 +235,7 @@ dc_rights
|
|||
|
||||
public InputStream getText() {
|
||||
try {
|
||||
if (this.text == null) return new ByteArrayInputStream("".getBytes());
|
||||
if (this.text == null) return new ByteArrayInputStream(UTF8.getBytes(""));
|
||||
if (this.text instanceof String) {
|
||||
return new ByteArrayInputStream(UTF8.getBytes(((String) this.text)));
|
||||
} else if (this.text instanceof InputStream) {
|
||||
|
@ -252,7 +252,7 @@ dc_rights
|
|||
} catch (final Exception e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
return new ByteArrayInputStream("".getBytes());
|
||||
return new ByteArrayInputStream(UTF8.getBytes(""));
|
||||
}
|
||||
|
||||
public byte[] getTextBytes() {
|
||||
|
|
|
@ -36,6 +36,7 @@ import java.util.Iterator;
|
|||
import java.util.LinkedList;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.document.AbstractParser;
|
||||
import net.yacy.document.Document;
|
||||
import net.yacy.document.Parser;
|
||||
|
@ -199,7 +200,7 @@ public class vcfParser extends AbstractParser implements Parser {
|
|||
}
|
||||
|
||||
final String[] sections = parsedNames.toArray(new String[parsedNames.size()]);
|
||||
final byte[] text = parsedDataText.toString().getBytes();
|
||||
final byte[] text = UTF8.getBytes(parsedDataText.toString());
|
||||
return new Document[]{new Document(
|
||||
url, // url of the source document
|
||||
mimeType, // the documents mime type
|
||||
|
@ -227,7 +228,7 @@ public class vcfParser extends AbstractParser implements Parser {
|
|||
|
||||
private String decodeQuotedPrintable(final String s) {
|
||||
if (s == null) return null;
|
||||
final byte[] b = s.getBytes();
|
||||
final byte[] b = UTF8.getBytes(s);
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < b.length; i++) {
|
||||
final int c = b[i];
|
||||
|
|
|
@ -146,7 +146,7 @@ public class opensearchdescriptionReader extends DefaultHandler {
|
|||
Log.logWarning("opensearchdescriptionReader", "response=" + UTF8.String(a));
|
||||
return null;
|
||||
}
|
||||
if (!ByteBuffer.equals(a, "<?xml".getBytes())) {
|
||||
if (!ByteBuffer.equals(a, UTF8.getBytes("<?xml"))) {
|
||||
Log.logWarning("opensearchdescriptionReader", "response does not contain valid xml");
|
||||
return null;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user