diff --git a/htroot/CrawlProfileEditor_p.java b/htroot/CrawlProfileEditor_p.java index 78aeffe71..06cce4ad0 100644 --- a/htroot/CrawlProfileEditor_p.java +++ b/htroot/CrawlProfileEditor_p.java @@ -101,7 +101,10 @@ public class CrawlProfileEditor_p { labels.add(new eentry(CrawlProfile.XPSTOPW, "Parent stop-words", false, eentry.BOOLEAN)); } - public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { + public static serverObjects respond( + final RequestHeader header, + final serverObjects post, + final serverSwitch env) { final servletProperties prop = new servletProperties(); final Switchboard sb = (Switchboard)env; @@ -131,7 +134,7 @@ public class CrawlProfileEditor_p { // generate handle list: first sort by handle name CrawlProfile selentry; - Map orderdHandles = new TreeMap(); + final Map orderdHandles = new TreeMap(); for (final byte[] h : sb.crawler.getActive()) { selentry = sb.crawler.getActive(h); if (selentry != null && !ignoreNames.contains(selentry.name())) { @@ -219,7 +222,8 @@ public class CrawlProfileEditor_p { prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_label", ee.label); prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type", ee.type); if (ee.type == eentry.BOOLEAN) { - prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type_checked", Boolean.parseBoolean(val) ? "1" : "0"); + prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type_checked", + Boolean.parseBoolean(val) ? "1" : "0"); } else { prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type_value", val); } @@ -231,7 +235,14 @@ public class CrawlProfileEditor_p { return prop; } - private static void putProfileEntry(final servletProperties prop, final CrawlStacker crawlStacker, final CrawlProfile profile, final boolean active, final boolean dark, final int count, final int domlistlength) { + private static void putProfileEntry( + final servletProperties prop, + final CrawlStacker crawlStacker, + final CrawlProfile profile, + final boolean active, + final boolean dark, + final int count, + final int domlistlength) { prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0"); prop.put(CRAWL_PROFILE_PREFIX + count + "_name", profile.name()); @@ -247,13 +258,13 @@ public class CrawlProfileEditor_p { prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingIfOlder", (profile.recrawlIfOlder() == 0L) ? "no re-crawl" : DateFormat.getDateTimeInstance().format(profile.recrawlIfOlder())); prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterDepth", "inactive"); - // start contrib [MN] int i = 0; - if (active && profile.domMaxPages() > 0 && profile.domMaxPages() != Integer.MAX_VALUE) { + if (active && profile.domMaxPages() > 0 + && profile.domMaxPages() != Integer.MAX_VALUE) { String item; - while (i <= domlistlength && !"".equals(item = crawlStacker.domName(true, i))){ + while (i <= domlistlength && !(item = crawlStacker.domName(true, i)).isEmpty()){ if (i == domlistlength) { - item = item + " ..."; + item += " ..."; } prop.putHTML(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterContent_" + i + "_item", item); i++; diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index ea1bc6fb0..b574183e6 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -4,7 +4,10 @@ // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de // Frankfurt, Germany, 2004 -// last major change: 31.08.2010 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -63,6 +66,26 @@ public class CrawlProfile extends ConcurrentHashMap implements M private Pattern mustmatch = null, mustnotmatch = null; + /** + * Constructor which creates CrawlPofile from parameters. + * @param name name of the crawl profile + * @param startURL root URL of the crawl + * @param mustmatch URLs which do not match this regex will be ignored + * @param mustnotmatch URLs which match this regex will be ignored + * @param depth height of the tree which will be created by the crawler + * @param recrawlIfOlder documents which have been indexed in the past will + * be indexed again if they are older than the time (ms) in this parameter + * @param domMaxPages maximum number from one domain which will be indexed + * @param crawlingQ true if URLs containing questionmarks shall be indexed + * @param indexText true if text content of URL shall be indexed + * @param indexMedia true if media content of URL shall be indexed + * @param storeHTCache true if content chall be kept in cache after indexing + * @param remoteIndexing true if part of the crawl job shall be distributed + * @param xsstopw true if static stop words shall be ignored + * @param xdstopw true if dynamic stop words shall be ignored + * @param xpstopw true if parent stop words shall be ignored + * @param cacheStrategy determines if and how cache is used loading content + */ public CrawlProfile( final String name, final DigestURI startURL, @@ -81,8 +104,12 @@ public class CrawlProfile extends ConcurrentHashMap implements M final boolean xpstopw, final CacheStrategy cacheStrategy) { super(40); - if (name == null || name.length() == 0) throw new NullPointerException("name must not be null"); - final String handle = (startURL == null) ? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength) : ASCII.String(startURL.hash()); + if (name == null || name.isEmpty()) { + throw new NullPointerException("name must not be null or empty"); + } + final String handle = (startURL == null) + ? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength) + : ASCII.String(startURL.hash()); put(HANDLE, handle); put(NAME, name); put(START_URL, (startURL == null) ? "" : startURL.toNormalform(true, false)); @@ -102,37 +129,75 @@ public class CrawlProfile extends ConcurrentHashMap implements M put(CACHE_STRAGEGY, cacheStrategy.toString()); } + /** + * Constructor which creats a CrawlProfile from values in a Map. + * @param ext contains values + */ public CrawlProfile(final Map ext) { super(ext == null ? 1 : ext.size()); if (ext != null) putAll(ext); } - - public void put(final String key, final boolean value) { + + /** + * Adds a parameter to CrawlProfile. + * @param key name of the parameter + * @param value values if the parameter + */ + public final void put(final String key, final boolean value) { super.put(key, Boolean.toString(value)); } - public void put(final String key, final int value) { + /** + * Adds a parameter to CrawlProfile. + * @param key name of the parameter + * @param value values if the parameter + */ + public final void put(final String key, final int value) { super.put(key, Integer.toString(value)); } - public void put(final String key, final long value) { + /** + * Adds a parameter to CrawlProfile. + * @param key name of the parameter + * @param value values if the parameter + */ + public final void put(final String key, final long value) { super.put(key, Long.toString(value)); } + /** + * Gets handle of the CrawlProfile. + * @return handle of the profile + */ public String handle() { final String r = get(HANDLE); //if (r == null) return null; return r; } + + /** + * Gets the name of the CrawlProfile. + * @return name of the profile + */ public String name() { final String r = get(NAME); if (r == null) return ""; return r; } + + /** + * Gets the root URL of the crawl job. + * @return root URL + */ public String startURL() { final String r = get(START_URL); return r; } + + /** + * Gets the regex which must be matched by URLs in order to be crawled. + * @return regex which must be matched + */ public Pattern mustMatchPattern() { if (this.mustmatch == null) { String r = get(FILTER_MUSTMATCH); @@ -141,6 +206,11 @@ public class CrawlProfile extends ConcurrentHashMap implements M } return this.mustmatch; } + + /** + * Gets the regex which must not be matched by URLs in order to be crawled. + * @return regex which must not be matched + */ public Pattern mustNotMatchPattern() { if (this.mustnotmatch == null) { String r = get(FILTER_MUSTNOTMATCH); @@ -149,6 +219,12 @@ public class CrawlProfile extends ConcurrentHashMap implements M } return this.mustnotmatch; } + + /** + * Gets depth of crawl job (or height of the tree which will be + * created by the crawler). + * @return depth of crawl job + */ public int depth() { final String r = get(DEPTH); if (r == null) return 0; @@ -159,6 +235,7 @@ public class CrawlProfile extends ConcurrentHashMap implements M return 0; } } + public CacheStrategy cacheStrategy() { final String r = get(CACHE_STRAGEGY); if (r == null) return CacheStrategy.IFEXIST; @@ -169,9 +246,15 @@ public class CrawlProfile extends ConcurrentHashMap implements M return CacheStrategy.IFEXIST; } } + public void setCacheStrategy(final CacheStrategy newStrategy) { put(CACHE_STRAGEGY, newStrategy.toString()); } + + /** + * Gets the minimum age that an entry must have to be re-crawled. + * @return time in ms + */ public long recrawlIfOlder() { // returns a long (millis) that is the minimum age that // an entry must have to be re-crawled @@ -185,6 +268,7 @@ public class CrawlProfile extends ConcurrentHashMap implements M return 0L; } } + public int domMaxPages() { // this is the maximum number of pages that are crawled for a single domain // if -1, this means no limit @@ -199,26 +283,31 @@ public class CrawlProfile extends ConcurrentHashMap implements M return Integer.MAX_VALUE; } } + public boolean crawlingQ() { final String r = get(CRAWLING_Q); if (r == null) return false; return (r.equals(Boolean.TRUE.toString())); } + public boolean pushSolr() { final String r = get(PUSH_SOLR); if (r == null) return true; return (r.equals(Boolean.TRUE.toString())); } + public boolean indexText() { final String r = get(INDEX_TEXT); if (r == null) return true; return (r.equals(Boolean.TRUE.toString())); } + public boolean indexMedia() { final String r = get(INDEX_MEDIA); if (r == null) return true; return (r.equals(Boolean.TRUE.toString())); } + public boolean storeHTCache() { final String r = get(STORE_HTCACHE); if (r == null) return false; @@ -229,16 +318,19 @@ public class CrawlProfile extends ConcurrentHashMap implements M if (r == null) return false; return (r.equals(Boolean.TRUE.toString())); } + public boolean excludeStaticStopwords() { final String r = get(XSSTOPW); if (r == null) return false; return (r.equals(Boolean.TRUE.toString())); } + public boolean excludeDynamicStopwords() { final String r = get(XDSTOPW); if (r == null) return false; return (r.equals(Boolean.TRUE.toString())); } + public boolean excludeParentStopwords() { final String r = get(XPSTOPW); if (r == null) return false; diff --git a/source/de/anomic/crawler/CrawlSwitchboard.java b/source/de/anomic/crawler/CrawlSwitchboard.java index 11be8b7c4..a2f4d2ab7 100644 --- a/source/de/anomic/crawler/CrawlSwitchboard.java +++ b/source/de/anomic/crawler/CrawlSwitchboard.java @@ -39,6 +39,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.kelondroException; +import net.yacy.repository.RegexHelper; public final class CrawlSwitchboard { @@ -52,6 +53,7 @@ public final class CrawlSwitchboard { public static final String DBFILE_ACTIVE_CRAWL_PROFILES = "crawlProfilesActive.heap"; public static final String DBFILE_PASSIVE_CRAWL_PROFILES = "crawlProfilesPassive.heap"; + public static final String DBFILE_INVALID_CRAWL_PROFILES = "crawlProfilesInvalid.heap"; public static final long CRAWL_PROFILE_PROXY_RECRAWL_CYCLE = 60L * 24L; public static final long CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE = 60L * 24L * 30L; @@ -61,7 +63,7 @@ public final class CrawlSwitchboard { public static final long CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE = 60L * 24L * 30L; private final Log log; - private Map> profilesActiveCrawls, profilesPassiveCrawls; + private Map> profilesActiveCrawls, profilesPassiveCrawls, profilesInvalidCrawls; public CrawlProfile defaultProxyProfile; public CrawlProfile defaultRemoteProfile; public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile; @@ -87,40 +89,37 @@ public final class CrawlSwitchboard { this.queuesRoot.mkdirs(); this.log.logConfig("Initializing Crawl Profiles"); + final File profilesInvalidFile = new File(queuesRoot, DBFILE_INVALID_CRAWL_PROFILES); + this.profilesInvalidCrawls = loadFromDB(profilesInvalidFile); + final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); - try { - this.profilesActiveCrawls = new MapHeap(profilesActiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); - } catch (final IOException e) { - Log.logException(e);Log.logException(e); - FileUtils.deletedelete(profilesActiveFile); - try { - this.profilesActiveCrawls = new MapHeap(profilesActiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); - } catch (final IOException e1) { - Log.logException(e1); - this.profilesActiveCrawls = null; + this.profilesActiveCrawls = loadFromDB(profilesActiveFile); + for (final byte[] handle : this.profilesActiveCrawls.keySet()) { + final CrawlProfile p; + p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); + if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_MUSTMATCH))) { + this.removeActive(handle); + this.putInvalid(handle, p); + Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + + " from active crawls since " + CrawlProfile.FILTER_MUSTMATCH + + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_MUSTMATCH)); + } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_MUSTNOTMATCH))) { + this.putInvalid(handle, p); + this.removeActive(handle); + Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() + + " from active crawls since " + CrawlProfile.FILTER_MUSTNOTMATCH + + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_MUSTNOTMATCH)); + } else { + Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } - } - for (final byte[] handle: this.profilesActiveCrawls.keySet()) { - final CrawlProfile p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); - Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); + } initActiveCrawlProfiles(); log.logInfo("Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries"); final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES); - try { - this.profilesPassiveCrawls = new MapHeap(profilesPassiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); - } catch (final IOException e) { - Log.logException(e);Log.logException(e); - FileUtils.deletedelete(profilesActiveFile); - try { - this.profilesPassiveCrawls = new MapHeap(profilesPassiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); - } catch (final IOException e1) { - Log.logException(e1); - this.profilesPassiveCrawls = null; - } - } - for (final byte[] handle: this.profilesPassiveCrawls.keySet()) { + this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile); + for (final byte[] handle : this.profilesPassiveCrawls.keySet()) { final CrawlProfile p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } @@ -135,6 +134,13 @@ public final class CrawlSwitchboard { if (m == null) return null; return new CrawlProfile(m); } + + public CrawlProfile getInvalid(final byte[] profileKey) { + if (profileKey == null) return null; + final Map m = this.profilesInvalidCrawls.get(profileKey); + if (m == null) return null; + return new CrawlProfile(m); + } public CrawlProfile getPassive(final byte[] profileKey) { if (profileKey == null) return null; @@ -146,6 +152,10 @@ public final class CrawlSwitchboard { public Set getActive() { return this.profilesActiveCrawls.keySet(); } + + public Set getInvalid() { + return this.profilesInvalidCrawls.keySet(); + } public Set getPassive() { return this.profilesPassiveCrawls.keySet(); @@ -155,6 +165,11 @@ public final class CrawlSwitchboard { if (profileKey == null) return; this.profilesActiveCrawls.remove(profileKey); } + + public void removeInvalid(final byte[] profileKey) { + if (profileKey == null) return; + this.profilesInvalidCrawls.remove(profileKey); + } public void removePassive(final byte[] profileKey) { if (profileKey == null) return; @@ -164,6 +179,10 @@ public final class CrawlSwitchboard { public void putActive(final byte[] profileKey, final CrawlProfile profile) { this.profilesActiveCrawls.put(profileKey, profile); } + + public void putInvalid(final byte[] profileKey, final CrawlProfile profile) { + this.profilesInvalidCrawls.put(profileKey, profile); + } public void putPassive(final byte[] profileKey, final CrawlProfile profile) { this.profilesPassiveCrawls.put(profileKey, profile); @@ -302,7 +321,31 @@ public final class CrawlSwitchboard { public void close() { ((MapHeap) this.profilesActiveCrawls).close(); + ((MapHeap) this.profilesInvalidCrawls).close(); ((MapHeap) this.profilesPassiveCrawls).close(); } + + + /** + * Loads crawl profiles from a DB file. + * @param file DB file + * @return crawl profile data + */ + private Map> loadFromDB(final File file) { + Map> ret; + try { + ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); + } catch (final IOException e) { + Log.logException(e);Log.logException(e); + FileUtils.deletedelete(file); + try { + ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); + } catch (final IOException e1) { + Log.logException(e1); + ret = null; + } + } + return ret; + } } diff --git a/source/net/yacy/cora/services/federated/yacy/CacheStrategy.java b/source/net/yacy/cora/services/federated/yacy/CacheStrategy.java index 14067465d..e61783b8c 100644 --- a/source/net/yacy/cora/services/federated/yacy/CacheStrategy.java +++ b/source/net/yacy/cora/services/federated/yacy/CacheStrategy.java @@ -1,11 +1,43 @@ +// CacheStrategy.java +// ------------------------ +// part of YaCy +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://www.anomic.de +// Frankfurt, Germany, 2011 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + package net.yacy.cora.services.federated.yacy; public enum CacheStrategy { - NOCACHE(0), // never use the cache, all content from fresh internet source - IFFRESH(1), // use the cache if the cache exists and is fresh using the proxy-fresh rules - IFEXIST(2), // use the cache if the cache exist. Do no check freshness. Otherwise use online source. - CACHEONLY(3); // never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available + /** Never use the cache, all content from fresh internet source. */ + NOCACHE(0), + /** Use the cache if the cache exists and is fresh using the + * proxy-fresh rules. */ + IFFRESH(1), + /** Use the cache if the cache exists. Do not check freshness. Otherwise + * use online source. */ + IFEXIST(2), + /** Never go online, use all content from cache. If no cache entry exist, + * consider content nevertheless as available */ + CACHEONLY(3); // the fifth case may be that the CacheStrategy object is assigned NULL. That means that no snippet creation is wanted. public int code; @@ -14,6 +46,7 @@ public enum CacheStrategy { this.code = code; } + @Override public String toString() { return Integer.toString(this.code); } diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 7c0945774..5280df6cf 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -444,7 +444,7 @@ public class Blacklist { path = element.substring(slashPos + 1); } - if (!allowRegex || !isValidRegex(host)) { + if (!allowRegex || !RegexHelper.isValidRegex(host)) { final int i = host.indexOf('*'); // check whether host begins illegally @@ -470,33 +470,18 @@ public class Blacklist { if (host.indexOf("*", i + 1) > -1) { return BlacklistError.TWO_WILDCARDS_IN_HOST; } - } else if (allowRegex && !isValidRegex(host)) { + } else if (allowRegex && !RegexHelper.isValidRegex(host)) { return BlacklistError.HOST_REGEX; } // check for errors on regex-compiling path - if (!isValidRegex(path) && !"*".equals(path)) { + if (!RegexHelper.isValidRegex(path) && !"*".equals(path)) { return BlacklistError.PATH_REGEX; } return BlacklistError.NO_ERROR; } - /** - * Checks if a given expression is a valid regular expression. - * @param expression The expression to be checked. - * @return True if the expression is a valid regular expression, else false. - */ - private static boolean isValidRegex(final String expression) { - boolean ret = true; - try { - Pattern.compile(expression); - } catch (final PatternSyntaxException e) { - ret = false; - } - return ret; - } - public static String defaultBlacklist(final File listsPath) { final List dirlist = FileUtils.getDirListing(listsPath, Blacklist.BLACKLIST_FILENAME_FILTER); if (dirlist.isEmpty()) { diff --git a/source/net/yacy/repository/FilterEngine.java b/source/net/yacy/repository/FilterEngine.java index 41d08e396..43032390e 100644 --- a/source/net/yacy/repository/FilterEngine.java +++ b/source/net/yacy/repository/FilterEngine.java @@ -45,6 +45,7 @@ public class FilterEngine { this.types = types; } + @Override public int compareTo(FilterEntry fe) { return this.path.compareToIgnoreCase(fe.path); } @@ -229,7 +230,7 @@ public class FilterEngine { path = element.substring(slashPos + 1); } - if (!allowRegex || !isValidRegex(host)) { + if (!allowRegex || !RegexHelper.isValidRegex(host)) { final int i = host.indexOf('*'); // check whether host begins illegally @@ -255,33 +256,16 @@ public class FilterEngine { if (host.indexOf("*", i + 1) > -1) { return ERR_TWO_WILDCARDS_IN_HOST; } - } else if (allowRegex && !isValidRegex(host)) { + } else if (allowRegex && !RegexHelper.isValidRegex(host)) { return ERR_HOST_REGEX; } // check for errors on regex-compiling path - if (!isValidRegex(path) && !path.equals("*")) { + if (!RegexHelper.isValidRegex(path) && !path.equals("*")) { return ERR_PATH_REGEX; } return 0; } - /** - * Checks if a given expression is a valid regular expression. - * @param expression The expression to be checked. - * @return True if the expression is a valid regular expression, else false. - */ - private static boolean isValidRegex(String expression) { - boolean ret = true; - try { - Pattern.compile(expression); - } catch (final PatternSyntaxException e) { - - ret = false; - } - return ret; - } - - } diff --git a/source/net/yacy/repository/RegexHelper.java b/source/net/yacy/repository/RegexHelper.java new file mode 100644 index 000000000..ff4c38bd6 --- /dev/null +++ b/source/net/yacy/repository/RegexHelper.java @@ -0,0 +1,52 @@ +// RegexHelper.java +// ------------------------ +// part of YaCy +// (C) by Marc Nause; marc.nause@gmx.de +// first published on http://www.anomic.de +// Braunchweig, Germany, 2011 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.repository; + +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + + +public final class RegexHelper { + + /** Private constructor to avoid instantiation of static class. */ + private RegexHelper() { } + + /** + * Checks if a given expression is a valid regular expression. + * @param expression expression to be checked + * @return true if the expression is a valid regular expression, else false + */ + public static boolean isValidRegex(final String expression) { + boolean ret = true; + try { + Pattern.compile(expression); + } catch (final PatternSyntaxException e) { + ret = false; + } + return ret; + } + +}