From dc0db3550ea599d3555eec915314671c8f0c04f4 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 11 Mar 2011 00:59:27 +0000 Subject: [PATCH] avoid string conversion git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7584 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/api/yacydoc.java | 2 +- source/de/anomic/crawler/ResultURLs.java | 2 +- source/de/anomic/data/YMarkTables.java | 2 +- source/de/anomic/search/Segment.java | 8 ++++---- .../data/citation/CitationReferenceRow.java | 4 ++-- .../yacy/kelondro/data/image/ImageReferenceRow.java | 4 ++-- source/net/yacy/kelondro/data/meta/URIMetadata.java | 2 +- .../net/yacy/kelondro/data/meta/URIMetadataRow.java | 10 +++++----- .../yacy/kelondro/data/word/WordReferenceRow.java | 13 ++++--------- source/net/yacy/kelondro/index/Row.java | 6 ++---- 10 files changed, 23 insertions(+), 30 deletions(-) diff --git a/htroot/api/yacydoc.java b/htroot/api/yacydoc.java index 32e4010dd..9ce089f2d 100644 --- a/htroot/api/yacydoc.java +++ b/htroot/api/yacydoc.java @@ -102,7 +102,7 @@ public class yacydoc { prop.putXML("dc_date", entry.moddate().toString()); prop.putXML("dc_type", String.valueOf(entry.doctype())); prop.putXML("dc_identifier", metadata.url().toNormalform(false, true)); - prop.putXML("dc_language", entry.language()); + prop.putXML("dc_language", UTF8.String(entry.language())); prop.put("yacy_urlhash", metadata.url().hash()); prop.putXML("yacy_loaddate", entry.loaddate().toString()); diff --git a/source/de/anomic/crawler/ResultURLs.java b/source/de/anomic/crawler/ResultURLs.java index 28c20c2d6..8bf7ef714 100644 --- a/source/de/anomic/crawler/ResultURLs.java +++ b/source/de/anomic/crawler/ResultURLs.java @@ -221,7 +221,7 @@ public final class ResultURLs { public static void main(final String[] args) { try { final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/"); - final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de", 0, 0, 0, 0, 0, 0); + final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de".getBytes(), 0, 0, 0, 0, 0, 0); EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING; System.out.println("valid test:\n======="); // add diff --git a/source/de/anomic/data/YMarkTables.java b/source/de/anomic/data/YMarkTables.java index 53c12d3a5..bf72d290f 100644 --- a/source/de/anomic/data/YMarkTables.java +++ b/source/de/anomic/data/YMarkTables.java @@ -429,7 +429,7 @@ public class YMarkTables { metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet())); metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount())); metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype())); - metadata.put(METADATA.LANGUAGE, urlEntry.language()); + metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language())); final URIMetadataRow.Components meta = urlEntry.metadata(); if (meta != null) { diff --git a/source/de/anomic/search/Segment.java b/source/de/anomic/search/Segment.java index 39563d592..915d9496d 100644 --- a/source/de/anomic/search/Segment.java +++ b/source/de/anomic/search/Segment.java @@ -218,13 +218,13 @@ public class Segment { Map.Entry wentry; String word; int len = (document == null) ? urlLength : document.dc_title().length(); - WordReferenceRow ientry = new WordReferenceRow(UTF8.String(url.hash()), + WordReferenceRow ientry = new WordReferenceRow(url.hash(), urlLength, urlComps, len, condenser.RESULT_NUMB_WORDS, condenser.RESULT_NUMB_SENTENCES, urlModified.getTime(), System.currentTimeMillis(), - language, + UTF8.getBytes(language), doctype, outlinksSame, outlinksOther); Word wprop; @@ -247,10 +247,10 @@ public class Segment { try { container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1); container.add(ientry); + rankingProcess.add(container, true, sourceName, -1); } catch (RowSpaceExceededException e) { continue; } - rankingProcess.add(container, true, sourceName, -1); } } if (rankingProcess != null) rankingProcess.oneFeederTerminated(); @@ -339,7 +339,7 @@ public class Segment { condenser.RESULT_NUMB_WORDS, // word count Response.docType(document.dc_format()), // doctype condenser.RESULT_FLAGS, // flags - language, // language + UTF8.getBytes(language), // language document.inboundLinks(), // inbound links document.outboundLinks(), // outbound links document.getAudiolinks().size(), // laudio diff --git a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java index 5ef1dcda7..70b4fad74 100644 --- a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java +++ b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java @@ -73,7 +73,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ { private final Row.Entry entry; public CitationReferenceRow( - final String urlHash, + final byte[] urlHash, final long lastmodified, // last-modified time of the document where word appears final long updatetime, // update time final int posintext, // occurrence of url; counts the url @@ -83,7 +83,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ { final int urlComps, // number of path components final byte typeofurl // outlinks to same domain ) { - assert (urlHash.length() == 12) : "urlhash = " + urlHash; + assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash); this.entry = citationRow.newEntry(); final int mddlm = MicroDate.microDateDays(lastmodified); final int mddct = MicroDate.microDateDays(updatetime); diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java index 42ebe6ad8..5a1c200ab 100644 --- a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java +++ b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java @@ -147,7 +147,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag this.entry.setCol(col_reserve2, 0); } - public ImageReferenceRow(final String urlHash, + public ImageReferenceRow(final byte[] urlHash, final int urlLength, // byte-length of complete URL final int urlComps, // number of path components final int titleLength, // length of description/length (longer are better?) @@ -160,7 +160,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag final int outlinksSame, // outlinks to same domain final int outlinksOther // outlinks to other domain ) { - assert (urlHash.length() == 12) : "urlhash = " + urlHash; + assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash); this.entry = urlEntryRow.newEntry(); final int mddlm = MicroDate.microDateDays(lastmodified); final int mddct = MicroDate.microDateDays(updatetime); diff --git a/source/net/yacy/kelondro/data/meta/URIMetadata.java b/source/net/yacy/kelondro/data/meta/URIMetadata.java index 827628c39..fe1b41cfa 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadata.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadata.java @@ -54,7 +54,7 @@ public interface URIMetadata { public char doctype(); - public String language(); + public byte[] language(); public int size(); diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 2a8a2b0e7..07797ad91 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -68,7 +68,7 @@ public class URIMetadataRow implements URIMetadata { "Cardinal wc-3 {b256}, " + // size of file by number of words; for video and audio: seconds "byte[] dt-1, " + // doctype, taken from extension or any other heuristic "Bitfield flags-4, " + // flags; any stuff (see Word-Entity definition) - "String lang-2, " + // language + "byte[] lang-2, " + // language "Cardinal llocal-2 {b256}, " + // # of outlinks to same domain; for video and image: width "Cardinal lother-2 {b256}, " + // # of outlinks to outside domain; for video and image: height "Cardinal limage-2 {b256}, " + // # of embedded image links @@ -130,7 +130,7 @@ public class URIMetadataRow implements URIMetadata { final int wc, final char dt, final Bitfield flags, - final String lang, + final byte[] lang, final int llocal, final int lother, final int laudio, @@ -150,7 +150,7 @@ public class URIMetadataRow implements URIMetadata { this.entry.setCol(col_wc, wc); this.entry.setCol(col_dt, new byte[]{(byte) dt}); this.entry.setCol(col_flags, flags.bytes()); - this.entry.setCol(col_lang, UTF8.getBytes(lang)); + this.entry.setCol(col_lang, lang); this.entry.setCol(col_llocal, llocal); this.entry.setCol(col_lother, lother); this.entry.setCol(col_limage, limage); @@ -409,8 +409,8 @@ public class URIMetadataRow implements URIMetadata { return (char) entry.getColByte(col_dt); } - public String language() { - return this.entry.getColString(col_lang); + public byte[] language() { + return this.entry.getColBytes(col_lang, true); } public int size() { diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index c3f8ccfa8..728730c94 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -162,7 +162,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.entry.setCol(col_reserve2, 0); } - public WordReferenceRow(final String urlHash, + public WordReferenceRow(final byte[] urlHash, final int urlLength, // byte-length of complete URL final int urlComps, // number of path components final int titleLength, // length of description/length (longer are better?) @@ -170,12 +170,12 @@ public final class WordReferenceRow extends AbstractReference implements WordRef final int phrasecount, // total number of phrases final long lastmodified, // last-modified time of the document where word appears final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short - final String language, // (guessed) language of document + final byte[] language, // (guessed) language of document final char doctype, // type of document final int outlinksSame, // outlinks to same domain final int outlinksOther // outlinks to other domain ) { - assert (urlHash.length() == 12) : "urlhash = " + urlHash; + assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash); this.entry = urlEntryRow.newEntry(); final int mddlm = MicroDate.microDateDays(lastmodified); final int mddct = MicroDate.microDateDays(updatetime); @@ -186,7 +186,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.entry.setCol(col_wordsInText, wordcount); this.entry.setCol(col_phrasesInText, phrasecount); this.entry.setCol(col_doctype, new byte[]{(byte) doctype}); - this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language); + this.entry.setCol(col_language, ((language == null) || (language.length != urlEntryRow.width(col_language))) ? WordReferenceVars.default_language : language); this.entry.setCol(col_llocal, outlinksSame); this.entry.setCol(col_lother, outlinksOther); this.entry.setCol(col_urlLength, urlLength); @@ -204,11 +204,6 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.entry.setCol(col_posofphrase, word.numOfPhrase); } - public WordReferenceRow(final String urlHash, final String code) { - // the code is the external form of the row minus the leading urlHash entry - this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code))); - } - public WordReferenceRow(final String external) { this.entry = urlEntryRow.newEntry(external, true); } diff --git a/source/net/yacy/kelondro/index/Row.java b/source/net/yacy/kelondro/index/Row.java index a5eca3794..d7001ffd1 100644 --- a/source/net/yacy/kelondro/index/Row.java +++ b/source/net/yacy/kelondro/index/Row.java @@ -478,10 +478,8 @@ public final class Row { } public final String getColString(final int column) { - return getColString(colstart[column], row[column].cellwidth); - } - - private final String getColString(final int clstrt, int length) { + final int clstrt = colstart[column]; + int length = row[column].cellwidth; if (rowinstance[offset + clstrt] == 0) return null; assert length <= rowinstance.length - offset - clstrt; if (length > rowinstance.length - offset - clstrt) length = rowinstance.length - offset - clstrt;