From 5b3acc12cd4b4343c4e7d7f0a20a1da8ea8d5f6a Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 17 May 2012 03:55:10 +0200 Subject: [PATCH 1/7] Pattern.quote() replaces \\Q and \\E according to publication in http://www.cs.washington.edu/homes/mernst/pubs/regex-types-ftfjp2012.pdf --- htroot/Load_RSS_p.java | 9 +++++---- source/de/anomic/data/ymark/YMarkCrawlStart.java | 7 +++---- source/de/anomic/data/ymark/YMarkTables.java | 10 ++-------- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/htroot/Load_RSS_p.java b/htroot/Load_RSS_p.java index 8f9511765..3209f8058 100644 --- a/htroot/Load_RSS_p.java +++ b/htroot/Load_RSS_p.java @@ -1,5 +1,5 @@ /** - * oad_RSS_p + * Load_RSS_p * Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany * First released 20.08.2010 at http://yacy.net * @@ -26,6 +26,7 @@ import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; import net.yacy.cora.document.Hit; import net.yacy.cora.document.RSSFeed; @@ -83,7 +84,7 @@ public class Load_RSS_p { if (messageurl.length() == 0) continue; final byte[] api_pk = row.get("api_pk"); final Row r = api_pk == null ? null : sb.tables.select("api", api_pk); - if (r == null || !r.get("comment", "").matches(".*\\Q" + messageurl + "\\E.*")) { + if (r == null || !r.get("comment", "").matches(".*" + Pattern.quote(messageurl) + ".*")) { d.add(row.getPK()); } } @@ -125,7 +126,7 @@ public class Load_RSS_p { if (messageurl.length() == 0) continue; final byte[] api_pk = row.get("api_pk"); final Row r = api_pk == null ? null : sb.tables.select("api", api_pk); - if (r != null && r.get("comment", "").matches(".*\\Q" + messageurl + "\\E.*")) { + if (r != null && r.get("comment", "").matches(".*" + Pattern.quote(messageurl) + ".*")) { d.add(row.getPK()); } } @@ -194,7 +195,7 @@ public class Load_RSS_p { // check if feed is registered in scheduler final byte[] api_pk = row.get("api_pk"); final Row r = api_pk == null ? null : sb.tables.select("api", api_pk); - if (r != null && r.get("comment", "").matches(".*\\Q" + messageurl + "\\E.*")) { + if (r != null && r.get("comment", "").matches(".*" + Pattern.quote(messageurl) + ".*")) { // this is a recorded entry final Date date_next_exec = r.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); prop.put("showscheduledfeeds_list_" + apic + "_pk", UTF8.String(row.getPK())); diff --git a/source/de/anomic/data/ymark/YMarkCrawlStart.java b/source/de/anomic/data/ymark/YMarkCrawlStart.java index 650918f3a..c5f7d8ac8 100644 --- a/source/de/anomic/data/ymark/YMarkCrawlStart.java +++ b/source/de/anomic/data/ymark/YMarkCrawlStart.java @@ -119,10 +119,9 @@ public class YMarkCrawlStart extends HashMap{ private void load() { try { final StringBuilder buffer = new StringBuilder(500); - //buffer.append("^.*crawlingURL=\\Q"); - buffer.append("^crawl start for \\Q"); - buffer.append(url); - buffer.append("\\E?.*"); + buffer.append("^crawl start for "); + buffer.append(Pattern.quote(url)); + buffer.append("?.*"); final Pattern pattern = Pattern.compile(buffer.toString()); //final Iterator APIcalls = this.worktables.iterator(WorkTables.TABLE_API_NAME, WorkTables.TABLE_API_COL_URL, pattern); final Iterator APIcalls = this.worktables.iterator(WorkTables.TABLE_API_NAME, WorkTables.TABLE_API_COL_COMMENT, pattern); diff --git a/source/de/anomic/data/ymark/YMarkTables.java b/source/de/anomic/data/ymark/YMarkTables.java index d5add5431..b14455733 100644 --- a/source/de/anomic/data/ymark/YMarkTables.java +++ b/source/de/anomic/data/ymark/YMarkTables.java @@ -92,8 +92,6 @@ public class YMarkTables { public final static String ADMIN_AUTHENTICATE_MSG = "Admin authentication required!"; public final static String p1 = "(?:^|.*,)"; - public final static String p2 = "\\Q"; - public final static String p3 = "\\E"; public final static String p4 = "(?:,.*|$)"; public final static String p5 = "((?:"; public final static String p6 = ")(?:,.*|$)){"; @@ -194,9 +192,7 @@ public class YMarkTables { patternBuilder.setLength(0); patternBuilder.append(p1); patternBuilder.append('('); - patternBuilder.append(p2); - patternBuilder.append(folder); - patternBuilder.append(p3); + patternBuilder.append(Pattern.quote(folder)); patternBuilder.append(')'); patternBuilder.append(p4); final Pattern p = Pattern.compile(patternBuilder.toString()); @@ -210,9 +206,7 @@ public class YMarkTables { patternBuilder.append(p1); patternBuilder.append(p5); for (final String tag : tagArray) { - patternBuilder.append(p2); - patternBuilder.append(tag); - patternBuilder.append(p3); + patternBuilder.append(Pattern.quote(tag)); patternBuilder.append('|'); } patternBuilder.deleteCharAt(patternBuilder.length()-1); From 7eece0256f42156053c4ccd744919ec3cabc2d2d Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 17 May 2012 04:26:03 +0200 Subject: [PATCH 2/7] moved yacy.logging to defaults according to request in http://bugs.yacy.net/view.php?id=55 --- yacy.logging => defaults/yacy.logging | 0 source/net/yacy/yacy.java | 2 +- .../de/Freizeitaktivitäten.vocabulary | 27 ------------------- 3 files changed, 1 insertion(+), 28 deletions(-) rename yacy.logging => defaults/yacy.logging (100%) delete mode 100644 vocabularies/de/Freizeitaktivitäten.vocabulary diff --git a/yacy.logging b/defaults/yacy.logging similarity index 100% rename from yacy.logging rename to defaults/yacy.logging diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 85ef9bc8b..499833531 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -174,7 +174,7 @@ public final class yacy { f = new File(dataHome, "DATA/LOG/"); mkdirsIfNeseccary(f); f = new File(dataHome, "DATA/LOG/yacy.logging"); - final File f0 = new File(appHome, "yacy.logging"); + final File f0 = new File(appHome, "defaults/yacy.logging"); if (!f.exists() || f0.lastModified() > f.lastModified()) try { FileUtils.copy(f0, f); } catch (final IOException e){ diff --git a/vocabularies/de/Freizeitaktivitäten.vocabulary b/vocabularies/de/Freizeitaktivitäten.vocabulary deleted file mode 100644 index d26af3e8a..000000000 --- a/vocabularies/de/Freizeitaktivitäten.vocabulary +++ /dev/null @@ -1,27 +0,0 @@ -Eislaufen -Drachensteigen -Saunabaden=Sauna -Spazieren und Wandern=Spazieren,Wandern,Park,Ufer -Skaten=Inline,Inliner -Skateboarden=Skateboard -Museumsbesuch=Museum -Altstadtbesuch=Altstadt -Parkbesuch=Park,Zoo,Freilichtmuseum,Burg -Ausflug=Schifstouren,Schifffahrt,Aussicht,Aussichtspunkt,Geocaching -Schwimmen=Schwimmbad,Freibad,Hallenbad,Schwimmbäder,Freibäder,Hallenbäder,Therme -Fastnacht -Eislaufen -Saunabaden=Sauna -Theater=Bühnen -Oper=Operette,Opernhaus -Spielplätze=Spielplatz -Museum -Camping -Picknick=Picknicken -Essen und Trinken=Essen,Trinken,Bar,Restaurant,Kneipe -Kochen -Singen=Gesang,Chor -Kegeln -Bowling -Kartbahn=Kart -Kino=Kinoprogramm,Cine \ No newline at end of file From 52d307c735af8f173dc595b6d4411502ecddf03e Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 17 May 2012 05:18:52 +0200 Subject: [PATCH 3/7] prevent that the snippet fectch process removes catchall entries --- source/net/yacy/search/index/Segment.java | 4 ++-- source/net/yacy/search/query/SnippetProcess.java | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index d4ae0a852..cb2abc0a3 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -72,8 +72,8 @@ import de.anomic.crawler.retrieval.Response; public class Segment { // catchall word - final static String catchallString = "yacyall"; // a word that is always in all indexes; can be used for zero-word searches to find ALL documents - final static byte[] catchallHash; + public final static String catchallString = "yacyall"; // a word that is always in all indexes; can be used for zero-word searches to find ALL documents + public final static byte[] catchallHash; final static Word catchallWord = new Word(0, 0, 0); static { catchallHash = Word.word2hash(catchallString); // "KZzU-Vf6h5k-" diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index 9eeb816d0..028450ab9 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -62,6 +62,7 @@ import org.apache.solr.common.SolrDocumentList; import de.anomic.crawler.Cache; import de.anomic.data.WorkTables; +import net.yacy.search.index.Segment; import net.yacy.search.index.SolrField; public class SnippetProcess { @@ -606,6 +607,10 @@ public class SnippetProcess { return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet } else { // problems with snippet fetch + if (this.snippetFetchWordHashes.has(Segment.catchallHash)) { + // we accept that because the word cannot be on the page + return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0); + } final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); if (this.deleteIfSnippetFail) { this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason); From 5f983faef9fc08c94c95023ad387d9d6d40ed5c4 Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Thu, 17 May 2012 05:49:25 +0200 Subject: [PATCH 4/7] No & in JavaScript-embeded URLs, added ability to stop focus in ConfigPortal.html preview (is this not secured with _p????) Conflicts: htroot/yacyinteractive.java htroot/yacysearch.java --- htroot/index.html | 1 - htroot/yacyinteractive.java | 4 ++-- htroot/yacysearch.html | 2 +- htroot/yacysearch.java | 13 +++++++++---- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/htroot/index.html b/htroot/index.html index 277270f8e..ed78a76b5 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -1,4 +1,3 @@ - #(forward)#::#(/forward)# diff --git a/htroot/yacyinteractive.java b/htroot/yacyinteractive.java index 01696fe54..959dbec48 100644 --- a/htroot/yacyinteractive.java +++ b/htroot/yacyinteractive.java @@ -54,9 +54,9 @@ public class yacyinteractive { prop.put("startRecord", startRecord); prop.put("maximumRecords", maximumRecords); prop.putHTML("querys", query.replaceAll(" ", "+")); - prop.put("serverlist", query.length() == 0 ? 1 : 0); + prop.put("serverlist", query.isEmpty() ? 1 : 0); prop.put("focus", focus ? 1 : 0); prop.put("allowrealtime", sb.indexSegments.URLCount() < 100000 ? 1 : 0); return prop; } -} \ No newline at end of file +} diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index ade1b8ed6..d1c5fc76d 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -252,4 +252,4 @@ window.setTimeout('latestinfo();',10000); - \ No newline at end of file + diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 617e6a495..ff82313b9 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -28,6 +28,7 @@ // if the shell's current path is HTROOT import java.io.IOException; +import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -120,9 +121,8 @@ public class yacysearch { prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0); //get focus option - final boolean focus = (post == null) ? true : post.get("focus", "1").equals("1"); - prop.put("focus", focus ? 1 : 0); - + prop.put("focus", ((post == null) ? true : post.get("focus", "1").equals("1")) ? 1 : 0); + // produce vocabulary navigation sidebars Collection vocabularies = LibraryProvider.autotagging.getVocabularies(); int j = 0; @@ -1104,7 +1104,12 @@ public class yacysearch { prop.putXML("rss_query", originalquerystring); prop.putXML("rss_queryenc", originalquerystring.replace(' ', '+')); - sb.localSearchLastAccess = System.currentTimeMillis(); + sb.localSearchLastAccess = System.currentTimeMillis(); + + // hostname and port (assume locahost if nothing helps) + final InetAddress hostIP = Domains.myPublicLocalIP(); + prop.put("myhost", hostIP != null ? hostIP.getHostAddress() : "localhost"); + prop.put("myport", serverCore.getPortNr(sb.getConfig("port", "8090"))); // return rewrite properties return prop; From fbb946f913c9cda7304ea4fbb5c67191a895e67d Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Tue, 15 May 2012 20:57:38 +0200 Subject: [PATCH 5/7] Made a method static (Eclipse suggested it), removed unused import, pk=null check does now output a warning in logfile --- htroot/Banner.java | 1 - source/net/yacy/kelondro/table/Table.java | 6 +++++- source/net/yacy/search/index/SolrConfiguration.java | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/htroot/Banner.java b/htroot/Banner.java index 9fad6e8d3..15281c827 100644 --- a/htroot/Banner.java +++ b/htroot/Banner.java @@ -34,7 +34,6 @@ import javax.imageio.ImageIO; import net.yacy.cora.protocol.RequestHeader; import net.yacy.peers.Seed; import net.yacy.peers.graphics.BannerData; -import net.yacy.peers.graphics.NetworkGraph; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.visualization.RasterPlotter; diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java index 366f387d4..3e0e94999 100644 --- a/source/net/yacy/kelondro/table/Table.java +++ b/source/net/yacy/kelondro/table/Table.java @@ -690,7 +690,11 @@ public class Table implements Index, Iterable { } this.file.put(i, p, 0); byte[] pk = lr.getPrimaryKeyBytes(); - if (pk == null) continue; + if (pk == null) { + // Table file might be corrupt + Log.logWarning("TABLE", "Possible corruption found in table " + this.filename() + " detected. i=" + i + ",p=" + p); + continue; + } this.index.put(pk, i); break; } diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 563684aaa..8089cf7a0 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -466,7 +466,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable * @param rel * @return binary encoded information about rel */ - private int relEval(final String[] rel) { + private static int relEval(final String[] rel) { int i = 0; for (final String s: rel) { final String s0 = s.toLowerCase().trim(); From b3ae2aa41f850bff8fd9281c8872a7a181e59deb Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Thu, 17 May 2012 06:00:49 +0200 Subject: [PATCH 6/7] With or without 'final'? At least please try it in other methods Conflicts: source/de/anomic/tools/tarTools.java --- source/de/anomic/tools/tarTools.java | 6 +++--- .../net/yacy/cora/document/Classification.java | 2 +- .../net/yacy/search/index/DocumentMetadata.java | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/source/de/anomic/tools/tarTools.java b/source/de/anomic/tools/tarTools.java index 043adf698..18c573635 100644 --- a/source/de/anomic/tools/tarTools.java +++ b/source/de/anomic/tools/tarTools.java @@ -89,8 +89,8 @@ public class tarTools { } Log.logInfo("UNTAR", "finished"); } - - public static void main(final String args[]){ + + public static void main(final String args[]) { // @arg0 source // @arg1 destination if(args.length == 2){ @@ -103,4 +103,4 @@ public class tarTools { System.out.println("usage: "); } } -} \ No newline at end of file +} diff --git a/source/net/yacy/cora/document/Classification.java b/source/net/yacy/cora/document/Classification.java index f0caad698..8f81ff04f 100644 --- a/source/net/yacy/cora/document/Classification.java +++ b/source/net/yacy/cora/document/Classification.java @@ -129,7 +129,7 @@ public class Classification { return appsExtSet.contains(appsExt.trim().toLowerCase()); } - public static ContentDomain getContentDomain(String ext) { + public static ContentDomain getContentDomain(final String ext) { if (isTextExtension(ext)) return ContentDomain.TEXT; if (isImageExtension(ext)) return ContentDomain.IMAGE; if (isAudioExtension(ext)) return ContentDomain.AUDIO; diff --git a/source/net/yacy/search/index/DocumentMetadata.java b/source/net/yacy/search/index/DocumentMetadata.java index b3cce470b..cefeb8359 100644 --- a/source/net/yacy/search/index/DocumentMetadata.java +++ b/source/net/yacy/search/index/DocumentMetadata.java @@ -62,37 +62,37 @@ public class DocumentMetadata implements Metadata { } @Override - public URIMetadata load(Element obrwi) { + public URIMetadata load(final Element obrwi) { // TODO Auto-generated method stub return null; } @Override - public URIMetadata load(byte[] urlHash) { + public URIMetadata load(final byte[] urlHash) { // TODO Auto-generated method stub return null; } @Override - public void store(URIMetadata entry) throws IOException { + public void store(final URIMetadata entry) throws IOException { // TODO Auto-generated method stub } @Override - public boolean remove(byte[] urlHashBytes) { + public boolean remove(final byte[] urlHashBytes) { // TODO Auto-generated method stub return false; } @Override - public boolean exists(byte[] urlHash) { + public boolean exists(final byte[] urlHash) { // TODO Auto-generated method stub return false; } @Override - public CloneableIterator keys(boolean up, byte[] firstKey) { + public CloneableIterator keys(final boolean up, final byte[] firstKey) { // TODO Auto-generated method stub return null; } @@ -104,14 +104,14 @@ public class DocumentMetadata implements Metadata { } @Override - public CloneableIterator entries(boolean up, String firstHash) + public CloneableIterator entries(final boolean up, final String firstHash) throws IOException { // TODO Auto-generated method stub return null; } @Override - public int deleteDomain(String hosthash) throws IOException { + public int deleteDomain(final String hosthash) throws IOException { // TODO Auto-generated method stub return 0; } From d10627d5912281ba363b81db270b25b2aedac4c0 Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Thu, 17 May 2012 06:03:18 +0200 Subject: [PATCH 7/7] More sync in close() methods Conflicts: source/net/yacy/kelondro/logging/GuiHandler.java source/net/yacy/kelondro/workflow/InstantBusyThread.java --- source/net/yacy/kelondro/logging/GuiHandler.java | 4 ++-- .../yacy/kelondro/workflow/InstantBusyThread.java | 12 +++++++++++- source/net/yacy/search/index/DocumentMetadata.java | 2 +- source/net/yacy/search/index/DocumentReference.java | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/source/net/yacy/kelondro/logging/GuiHandler.java b/source/net/yacy/kelondro/logging/GuiHandler.java index f88695383..70d4df144 100644 --- a/source/net/yacy/kelondro/logging/GuiHandler.java +++ b/source/net/yacy/kelondro/logging/GuiHandler.java @@ -208,8 +208,8 @@ public class GuiHandler extends Handler { } - public void close() throws SecurityException { - + public synchronized void close() throws SecurityException { + // Nothing implement here } } diff --git a/source/net/yacy/kelondro/workflow/InstantBusyThread.java b/source/net/yacy/kelondro/workflow/InstantBusyThread.java index b5d48328f..7cbbcd51c 100644 --- a/source/net/yacy/kelondro/workflow/InstantBusyThread.java +++ b/source/net/yacy/kelondro/workflow/InstantBusyThread.java @@ -179,7 +179,17 @@ public final class InstantBusyThread extends AbstractBusyThread implements BusyT public static WorkflowThread oneTimeJob(final Runnable thread, final long startupDelay, final int maxJobs) { while (instantThreadCounter >= maxJobs) try {Thread.sleep(100);} catch (final InterruptedException e) {break;} - return oneTimeJob( thread, startupDelay); + return oneTimeJob( thread, startupDelay); + } + + @Override + public void open() { + // Not implemented in this thread + } + + @Override + public synchronized void close() { + // Not implemented in this thread } } diff --git a/source/net/yacy/search/index/DocumentMetadata.java b/source/net/yacy/search/index/DocumentMetadata.java index cefeb8359..56f4f773f 100644 --- a/source/net/yacy/search/index/DocumentMetadata.java +++ b/source/net/yacy/search/index/DocumentMetadata.java @@ -50,7 +50,7 @@ public class DocumentMetadata implements Metadata { } @Override - public void close() { + public synchronized void close() { // TODO Auto-generated method stub } diff --git a/source/net/yacy/search/index/DocumentReference.java b/source/net/yacy/search/index/DocumentReference.java index 51906c65e..0f2b7c1d1 100644 --- a/source/net/yacy/search/index/DocumentReference.java +++ b/source/net/yacy/search/index/DocumentReference.java @@ -42,7 +42,7 @@ public class DocumentReference { return data.size(); } - public void close() { + public synchronized void close() { if (data != null) { data.close(); }