memory protection for URLAnalysis

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5649 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-02-24 22:05:09 +00:00
parent 0f6fa804ff
commit 89d8e824ed
2 changed files with 33 additions and 3 deletions

View File

@ -77,10 +77,10 @@
<dd><input type="text" name="exportfilter" value=".*.*" size="20" maxlength="250" />
</dd>
<dt class="TableCellDark">Export Format</dt>
<dd>Only Domain <i>(superfast)</i>:
<dd>Only Domain:
<input type="radio" name="format" value="dom-text" />Plain Text List (domains only)&nbsp;&nbsp;
<input type="radio" name="format" value="dom-html" checked="checked" />HTML (domains as URLs, no title)<br />
Full URL List <i>(high IO)&nbsp;&nbsp;&nbsp;&nbsp;</i>:
Full URL List:
<input type="radio" name="format" value="url-text" />Plain Text List (URLs only)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input type="radio" name="format" value="url-html" />HTML (URLs with title)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input type="radio" name="format" value="url-rss" />XML (RSS)

View File

@ -51,6 +51,8 @@ public class URLAnalysis {
* processes to analyse URL lists
*/
private static final long cleanuplimit = 50 * 1024 * 1024;
public static yacyURL poison = null;
static {
try {
@ -77,7 +79,6 @@ public class URLAnalysis {
try {
url = in.take();
if (url == poison) break;
//System.out.println(url);
update(url.getHost().replaceAll("-", "\\.").split("\\."));
update(p.matcher(url.getPath()).replaceAll("/").split("/"));
} catch (InterruptedException e) {
@ -96,6 +97,30 @@ public class URLAnalysis {
}
}
public static void cleanup(ConcurrentHashMap<String, Integer> stat) {
Map.Entry<String, Integer> entry;
int c, low = Integer.MAX_VALUE;
Iterator<Map.Entry<String, Integer>> i = stat.entrySet().iterator();
while (i.hasNext()) {
entry = i.next();
c = entry.getValue().intValue();
if (c == 1) {
i.remove();
} else {
if (c < low) low = c;
}
}
i = stat.entrySet().iterator();
while (i.hasNext()) {
entry = i.next();
c = entry.getValue().intValue();
if (c == low) {
i.remove();
}
}
Runtime.getRuntime().gc();
}
public static void main(String[] args) {
String filename = args[0];
String analysis = filename + ".stats";
@ -133,6 +158,11 @@ public class URLAnalysis {
if (System.currentTimeMillis() - time > 1000) {
time = System.currentTimeMillis();
System.out.println("processed " + count + " urls, " + (MemoryControl.available() / 1024 / 1024) + " mb left, " + count * 1000L / (time - start) + " url/second");
if (MemoryControl.available() < cleanuplimit) {
System.out.println("starting cleanup, " + out.size() + " entries in statistic");
cleanup(out);
System.out.println("finished cleanup, " + out.size() + " entries in statistic left, " + (MemoryControl.available() / 1024 / 1024) + " mb left");
}
}
}
reader.close();