*) Invalid crawl profiles (containing invalid mustmatch/mustnotmatch filters) will be moved from active crawls to invalid crawls (new file: DATA/INDEX/freeworld/QUEUES/crawlProfilesInvalid.heap). This file can not be edited yet, but it shoudl be easy to extend the CrawlProfileEditor accordingly.

*) Corrupt crawlProfilesPassive.heap would cause crawlProfilesActive.heap to be deleted. Don't know if this ever happend, but will not happen anymore.
*) Cleaned up a little bit.
*) Added some comments.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7827 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
low012 2011-07-03 23:55:55 +00:00
parent b84089ff04
commit c7b95e8c81
7 changed files with 285 additions and 85 deletions

View File

@ -101,7 +101,10 @@ public class CrawlProfileEditor_p {
labels.add(new eentry(CrawlProfile.XPSTOPW, "Parent stop-words", false, eentry.BOOLEAN));
}
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
public static serverObjects respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) {
final servletProperties prop = new servletProperties();
final Switchboard sb = (Switchboard)env;
@ -131,7 +134,7 @@ public class CrawlProfileEditor_p {
// generate handle list: first sort by handle name
CrawlProfile selentry;
Map<String, String> orderdHandles = new TreeMap<String, String>();
final Map<String, String> orderdHandles = new TreeMap<String, String>();
for (final byte[] h : sb.crawler.getActive()) {
selentry = sb.crawler.getActive(h);
if (selentry != null && !ignoreNames.contains(selentry.name())) {
@ -219,7 +222,8 @@ public class CrawlProfileEditor_p {
prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_label", ee.label);
prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type", ee.type);
if (ee.type == eentry.BOOLEAN) {
prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type_checked", Boolean.parseBoolean(val) ? "1" : "0");
prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type_checked",
Boolean.parseBoolean(val) ? "1" : "0");
} else {
prop.put(EDIT_ENTRIES_PREFIX + count + "_readonly_type_value", val);
}
@ -231,7 +235,14 @@ public class CrawlProfileEditor_p {
return prop;
}
private static void putProfileEntry(final servletProperties prop, final CrawlStacker crawlStacker, final CrawlProfile profile, final boolean active, final boolean dark, final int count, final int domlistlength) {
private static void putProfileEntry(
final servletProperties prop,
final CrawlStacker crawlStacker,
final CrawlProfile profile,
final boolean active,
final boolean dark,
final int count,
final int domlistlength) {
prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0");
prop.put(CRAWL_PROFILE_PREFIX + count + "_name", profile.name());
@ -247,13 +258,13 @@ public class CrawlProfileEditor_p {
prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingIfOlder", (profile.recrawlIfOlder() == 0L) ? "no re-crawl" : DateFormat.getDateTimeInstance().format(profile.recrawlIfOlder()));
prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterDepth", "inactive");
// start contrib [MN]
int i = 0;
if (active && profile.domMaxPages() > 0 && profile.domMaxPages() != Integer.MAX_VALUE) {
if (active && profile.domMaxPages() > 0
&& profile.domMaxPages() != Integer.MAX_VALUE) {
String item;
while (i <= domlistlength && !"".equals(item = crawlStacker.domName(true, i))){
while (i <= domlistlength && !(item = crawlStacker.domName(true, i)).isEmpty()){
if (i == domlistlength) {
item = item + " ...";
item += " ...";
}
prop.putHTML(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterContent_" + i + "_item", item);
i++;

View File

@ -4,7 +4,10 @@
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 31.08.2010
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -63,6 +66,26 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
private Pattern mustmatch = null, mustnotmatch = null;
/**
* Constructor which creates CrawlPofile from parameters.
* @param name name of the crawl profile
* @param startURL root URL of the crawl
* @param mustmatch URLs which do not match this regex will be ignored
* @param mustnotmatch URLs which match this regex will be ignored
* @param depth height of the tree which will be created by the crawler
* @param recrawlIfOlder documents which have been indexed in the past will
* be indexed again if they are older than the time (ms) in this parameter
* @param domMaxPages maximum number from one domain which will be indexed
* @param crawlingQ true if URLs containing questionmarks shall be indexed
* @param indexText true if text content of URL shall be indexed
* @param indexMedia true if media content of URL shall be indexed
* @param storeHTCache true if content chall be kept in cache after indexing
* @param remoteIndexing true if part of the crawl job shall be distributed
* @param xsstopw true if static stop words shall be ignored
* @param xdstopw true if dynamic stop words shall be ignored
* @param xpstopw true if parent stop words shall be ignored
* @param cacheStrategy determines if and how cache is used loading content
*/
public CrawlProfile(
final String name,
final DigestURI startURL,
@ -81,8 +104,12 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
final boolean xpstopw,
final CacheStrategy cacheStrategy) {
super(40);
if (name == null || name.length() == 0) throw new NullPointerException("name must not be null");
final String handle = (startURL == null) ? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength) : ASCII.String(startURL.hash());
if (name == null || name.isEmpty()) {
throw new NullPointerException("name must not be null or empty");
}
final String handle = (startURL == null)
? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength)
: ASCII.String(startURL.hash());
put(HANDLE, handle);
put(NAME, name);
put(START_URL, (startURL == null) ? "" : startURL.toNormalform(true, false));
@ -102,37 +129,75 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
put(CACHE_STRAGEGY, cacheStrategy.toString());
}
/**
* Constructor which creats a CrawlProfile from values in a Map.
* @param ext contains values
*/
public CrawlProfile(final Map<String, String> ext) {
super(ext == null ? 1 : ext.size());
if (ext != null) putAll(ext);
}
public void put(final String key, final boolean value) {
/**
* Adds a parameter to CrawlProfile.
* @param key name of the parameter
* @param value values if the parameter
*/
public final void put(final String key, final boolean value) {
super.put(key, Boolean.toString(value));
}
public void put(final String key, final int value) {
/**
* Adds a parameter to CrawlProfile.
* @param key name of the parameter
* @param value values if the parameter
*/
public final void put(final String key, final int value) {
super.put(key, Integer.toString(value));
}
public void put(final String key, final long value) {
/**
* Adds a parameter to CrawlProfile.
* @param key name of the parameter
* @param value values if the parameter
*/
public final void put(final String key, final long value) {
super.put(key, Long.toString(value));
}
/**
* Gets handle of the CrawlProfile.
* @return handle of the profile
*/
public String handle() {
final String r = get(HANDLE);
//if (r == null) return null;
return r;
}
/**
* Gets the name of the CrawlProfile.
* @return name of the profile
*/
public String name() {
final String r = get(NAME);
if (r == null) return "";
return r;
}
/**
* Gets the root URL of the crawl job.
* @return root URL
*/
public String startURL() {
final String r = get(START_URL);
return r;
}
/**
* Gets the regex which must be matched by URLs in order to be crawled.
* @return regex which must be matched
*/
public Pattern mustMatchPattern() {
if (this.mustmatch == null) {
String r = get(FILTER_MUSTMATCH);
@ -141,6 +206,11 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
}
return this.mustmatch;
}
/**
* Gets the regex which must not be matched by URLs in order to be crawled.
* @return regex which must not be matched
*/
public Pattern mustNotMatchPattern() {
if (this.mustnotmatch == null) {
String r = get(FILTER_MUSTNOTMATCH);
@ -149,6 +219,12 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
}
return this.mustnotmatch;
}
/**
* Gets depth of crawl job (or height of the tree which will be
* created by the crawler).
* @return depth of crawl job
*/
public int depth() {
final String r = get(DEPTH);
if (r == null) return 0;
@ -159,6 +235,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return 0;
}
}
public CacheStrategy cacheStrategy() {
final String r = get(CACHE_STRAGEGY);
if (r == null) return CacheStrategy.IFEXIST;
@ -169,9 +246,15 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return CacheStrategy.IFEXIST;
}
}
public void setCacheStrategy(final CacheStrategy newStrategy) {
put(CACHE_STRAGEGY, newStrategy.toString());
}
/**
* Gets the minimum age that an entry must have to be re-crawled.
* @return time in ms
*/
public long recrawlIfOlder() {
// returns a long (millis) that is the minimum age that
// an entry must have to be re-crawled
@ -185,6 +268,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return 0L;
}
}
public int domMaxPages() {
// this is the maximum number of pages that are crawled for a single domain
// if -1, this means no limit
@ -199,26 +283,31 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return Integer.MAX_VALUE;
}
}
public boolean crawlingQ() {
final String r = get(CRAWLING_Q);
if (r == null) return false;
return (r.equals(Boolean.TRUE.toString()));
}
public boolean pushSolr() {
final String r = get(PUSH_SOLR);
if (r == null) return true;
return (r.equals(Boolean.TRUE.toString()));
}
public boolean indexText() {
final String r = get(INDEX_TEXT);
if (r == null) return true;
return (r.equals(Boolean.TRUE.toString()));
}
public boolean indexMedia() {
final String r = get(INDEX_MEDIA);
if (r == null) return true;
return (r.equals(Boolean.TRUE.toString()));
}
public boolean storeHTCache() {
final String r = get(STORE_HTCACHE);
if (r == null) return false;
@ -229,16 +318,19 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (r == null) return false;
return (r.equals(Boolean.TRUE.toString()));
}
public boolean excludeStaticStopwords() {
final String r = get(XSSTOPW);
if (r == null) return false;
return (r.equals(Boolean.TRUE.toString()));
}
public boolean excludeDynamicStopwords() {
final String r = get(XDSTOPW);
if (r == null) return false;
return (r.equals(Boolean.TRUE.toString()));
}
public boolean excludeParentStopwords() {
final String r = get(XPSTOPW);
if (r == null) return false;

View File

@ -39,6 +39,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException;
import net.yacy.repository.RegexHelper;
public final class CrawlSwitchboard {
@ -52,6 +53,7 @@ public final class CrawlSwitchboard {
public static final String DBFILE_ACTIVE_CRAWL_PROFILES = "crawlProfilesActive.heap";
public static final String DBFILE_PASSIVE_CRAWL_PROFILES = "crawlProfilesPassive.heap";
public static final String DBFILE_INVALID_CRAWL_PROFILES = "crawlProfilesInvalid.heap";
public static final long CRAWL_PROFILE_PROXY_RECRAWL_CYCLE = 60L * 24L;
public static final long CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE = 60L * 24L * 30L;
@ -61,7 +63,7 @@ public final class CrawlSwitchboard {
public static final long CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE = 60L * 24L * 30L;
private final Log log;
private Map<byte[], Map<String, String>> profilesActiveCrawls, profilesPassiveCrawls;
private Map<byte[], Map<String, String>> profilesActiveCrawls, profilesPassiveCrawls, profilesInvalidCrawls;
public CrawlProfile defaultProxyProfile;
public CrawlProfile defaultRemoteProfile;
public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
@ -87,40 +89,37 @@ public final class CrawlSwitchboard {
this.queuesRoot.mkdirs();
this.log.logConfig("Initializing Crawl Profiles");
final File profilesInvalidFile = new File(queuesRoot, DBFILE_INVALID_CRAWL_PROFILES);
this.profilesInvalidCrawls = loadFromDB(profilesInvalidFile);
final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
try {
this.profilesActiveCrawls = new MapHeap(profilesActiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch (final IOException e) {
Log.logException(e);Log.logException(e);
FileUtils.deletedelete(profilesActiveFile);
try {
this.profilesActiveCrawls = new MapHeap(profilesActiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch (final IOException e1) {
Log.logException(e1);
this.profilesActiveCrawls = null;
this.profilesActiveCrawls = loadFromDB(profilesActiveFile);
for (final byte[] handle : this.profilesActiveCrawls.keySet()) {
final CrawlProfile p;
p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_MUSTMATCH))) {
this.removeActive(handle);
this.putInvalid(handle, p);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_MUSTMATCH
+ " is no valid regular expression: " + p.get(CrawlProfile.FILTER_MUSTMATCH));
} else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_MUSTNOTMATCH))) {
this.putInvalid(handle, p);
this.removeActive(handle);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_MUSTNOTMATCH
+ " is no valid regular expression: " + p.get(CrawlProfile.FILTER_MUSTNOTMATCH));
} else {
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
}
}
for (final byte[] handle: this.profilesActiveCrawls.keySet()) {
final CrawlProfile p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
}
initActiveCrawlProfiles();
log.logInfo("Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries");
final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES);
try {
this.profilesPassiveCrawls = new MapHeap(profilesPassiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch (final IOException e) {
Log.logException(e);Log.logException(e);
FileUtils.deletedelete(profilesActiveFile);
try {
this.profilesPassiveCrawls = new MapHeap(profilesPassiveFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch (final IOException e1) {
Log.logException(e1);
this.profilesPassiveCrawls = null;
}
}
for (final byte[] handle: this.profilesPassiveCrawls.keySet()) {
this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile);
for (final byte[] handle : this.profilesPassiveCrawls.keySet()) {
final CrawlProfile p = new CrawlProfile(this.profilesPassiveCrawls.get(handle));
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
}
@ -135,6 +134,13 @@ public final class CrawlSwitchboard {
if (m == null) return null;
return new CrawlProfile(m);
}
public CrawlProfile getInvalid(final byte[] profileKey) {
if (profileKey == null) return null;
final Map<String, String> m = this.profilesInvalidCrawls.get(profileKey);
if (m == null) return null;
return new CrawlProfile(m);
}
public CrawlProfile getPassive(final byte[] profileKey) {
if (profileKey == null) return null;
@ -146,6 +152,10 @@ public final class CrawlSwitchboard {
public Set<byte[]> getActive() {
return this.profilesActiveCrawls.keySet();
}
public Set<byte[]> getInvalid() {
return this.profilesInvalidCrawls.keySet();
}
public Set<byte[]> getPassive() {
return this.profilesPassiveCrawls.keySet();
@ -155,6 +165,11 @@ public final class CrawlSwitchboard {
if (profileKey == null) return;
this.profilesActiveCrawls.remove(profileKey);
}
public void removeInvalid(final byte[] profileKey) {
if (profileKey == null) return;
this.profilesInvalidCrawls.remove(profileKey);
}
public void removePassive(final byte[] profileKey) {
if (profileKey == null) return;
@ -164,6 +179,10 @@ public final class CrawlSwitchboard {
public void putActive(final byte[] profileKey, final CrawlProfile profile) {
this.profilesActiveCrawls.put(profileKey, profile);
}
public void putInvalid(final byte[] profileKey, final CrawlProfile profile) {
this.profilesInvalidCrawls.put(profileKey, profile);
}
public void putPassive(final byte[] profileKey, final CrawlProfile profile) {
this.profilesPassiveCrawls.put(profileKey, profile);
@ -302,7 +321,31 @@ public final class CrawlSwitchboard {
public void close() {
((MapHeap) this.profilesActiveCrawls).close();
((MapHeap) this.profilesInvalidCrawls).close();
((MapHeap) this.profilesPassiveCrawls).close();
}
/**
* Loads crawl profiles from a DB file.
* @param file DB file
* @return crawl profile data
*/
private Map<byte[], Map<String, String>> loadFromDB(final File file) {
Map<byte[], Map<String, String>> ret;
try {
ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch (final IOException e) {
Log.logException(e);Log.logException(e);
FileUtils.deletedelete(file);
try {
ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch (final IOException e1) {
Log.logException(e1);
ret = null;
}
}
return ret;
}
}

View File

@ -1,11 +1,43 @@
// CacheStrategy.java
// ------------------------
// part of YaCy
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2011
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.services.federated.yacy;
public enum CacheStrategy {
NOCACHE(0), // never use the cache, all content from fresh internet source
IFFRESH(1), // use the cache if the cache exists and is fresh using the proxy-fresh rules
IFEXIST(2), // use the cache if the cache exist. Do no check freshness. Otherwise use online source.
CACHEONLY(3); // never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available
/** Never use the cache, all content from fresh internet source. */
NOCACHE(0),
/** Use the cache if the cache exists and is fresh using the
* proxy-fresh rules. */
IFFRESH(1),
/** Use the cache if the cache exists. Do not check freshness. Otherwise
* use online source. */
IFEXIST(2),
/** Never go online, use all content from cache. If no cache entry exist,
* consider content nevertheless as available */
CACHEONLY(3);
// the fifth case may be that the CacheStrategy object is assigned NULL. That means that no snippet creation is wanted.
public int code;
@ -14,6 +46,7 @@ public enum CacheStrategy {
this.code = code;
}
@Override
public String toString() {
return Integer.toString(this.code);
}

View File

@ -444,7 +444,7 @@ public class Blacklist {
path = element.substring(slashPos + 1);
}
if (!allowRegex || !isValidRegex(host)) {
if (!allowRegex || !RegexHelper.isValidRegex(host)) {
final int i = host.indexOf('*');
// check whether host begins illegally
@ -470,33 +470,18 @@ public class Blacklist {
if (host.indexOf("*", i + 1) > -1) {
return BlacklistError.TWO_WILDCARDS_IN_HOST;
}
} else if (allowRegex && !isValidRegex(host)) {
} else if (allowRegex && !RegexHelper.isValidRegex(host)) {
return BlacklistError.HOST_REGEX;
}
// check for errors on regex-compiling path
if (!isValidRegex(path) && !"*".equals(path)) {
if (!RegexHelper.isValidRegex(path) && !"*".equals(path)) {
return BlacklistError.PATH_REGEX;
}
return BlacklistError.NO_ERROR;
}
/**
* Checks if a given expression is a valid regular expression.
* @param expression The expression to be checked.
* @return True if the expression is a valid regular expression, else false.
*/
private static boolean isValidRegex(final String expression) {
boolean ret = true;
try {
Pattern.compile(expression);
} catch (final PatternSyntaxException e) {
ret = false;
}
return ret;
}
public static String defaultBlacklist(final File listsPath) {
final List<String> dirlist = FileUtils.getDirListing(listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
if (dirlist.isEmpty()) {

View File

@ -45,6 +45,7 @@ public class FilterEngine {
this.types = types;
}
@Override
public int compareTo(FilterEntry fe) {
return this.path.compareToIgnoreCase(fe.path);
}
@ -229,7 +230,7 @@ public class FilterEngine {
path = element.substring(slashPos + 1);
}
if (!allowRegex || !isValidRegex(host)) {
if (!allowRegex || !RegexHelper.isValidRegex(host)) {
final int i = host.indexOf('*');
// check whether host begins illegally
@ -255,33 +256,16 @@ public class FilterEngine {
if (host.indexOf("*", i + 1) > -1) {
return ERR_TWO_WILDCARDS_IN_HOST;
}
} else if (allowRegex && !isValidRegex(host)) {
} else if (allowRegex && !RegexHelper.isValidRegex(host)) {
return ERR_HOST_REGEX;
}
// check for errors on regex-compiling path
if (!isValidRegex(path) && !path.equals("*")) {
if (!RegexHelper.isValidRegex(path) && !path.equals("*")) {
return ERR_PATH_REGEX;
}
return 0;
}
/**
* Checks if a given expression is a valid regular expression.
* @param expression The expression to be checked.
* @return True if the expression is a valid regular expression, else false.
*/
private static boolean isValidRegex(String expression) {
boolean ret = true;
try {
Pattern.compile(expression);
} catch (final PatternSyntaxException e) {
ret = false;
}
return ret;
}
}

View File

@ -0,0 +1,52 @@
// RegexHelper.java
// ------------------------
// part of YaCy
// (C) by Marc Nause; marc.nause@gmx.de
// first published on http://www.anomic.de
// Braunchweig, Germany, 2011
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.repository;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public final class RegexHelper {
/** Private constructor to avoid instantiation of static class. */
private RegexHelper() { }
/**
* Checks if a given expression is a valid regular expression.
* @param expression expression to be checked
* @return true if the expression is a valid regular expression, else false
*/
public static boolean isValidRegex(final String expression) {
boolean ret = true;
try {
Pattern.compile(expression);
} catch (final PatternSyntaxException e) {
ret = false;
}
return ret;
}
}