Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

This commit is contained in:
Michael Peter Christen 2014-07-07 11:11:50 +02:00
commit f384fd624b
20 changed files with 233 additions and 52 deletions

View File

@ -439,7 +439,7 @@
<target name="compileTest" depends="compile" description="run unittests">
<javac srcdir="${test}" destdir="${test}"
debug="true" debuglevel="lines,vars,source"
source="${javacSource}" target="${javacTarget}">
source="${javacSource}" target="${javacTarget}" encoding="UTF-8">
<classpath>
<pathelement location="${build}"/>
<pathelement location="${htroot}"/>

View File

@ -54,16 +54,16 @@ public final class timeline_p {
// get type of data to be listed in the timeline
int maxeventsperperiod = post.getInt("head", 1); // the maximum number of events per period
String period = post.get("period", ""); // must be an integer with a character c at the end, c = Y|M|d|h|m|s
int periodlength = 0;
long periodlength = 0;
if (period.length() > 0) {
char c = period.charAt(period.length() - 1);
int p = Integer.parseInt(period.substring(0, period.length() - 1));
if (c == 's') periodlength = p * 1000;
else if (c == 'm') periodlength = p * 1000 * 60;
else if (c == 'h') periodlength = p * 1000 * 60 * 60;
else if (c == 'd') periodlength = p * 1000 * 60 * 60 * 24;
else if (c == 'M') periodlength = p * 1000 * 60 * 60 * 24 * 30;
else if (c == 'Y') periodlength = p * 1000 * 60 * 60 * 24 * 365;
long p = Long.parseLong(period.substring(0, period.length() - 1));
if (c == 's') periodlength = p * 1000L;
else if (c == 'm') periodlength = p * 1000L * 60L;
else if (c == 'h') periodlength = p * 1000L * 60L * 60L;
else if (c == 'd') periodlength = p * 1000L * 60L * 60L * 24L;
else if (c == 'M') periodlength = p * 1000L * 60L * 60L * 24L * 30L;
else if (c == 'Y' || c == 'y') periodlength = p * 1000L * 60L * 60L * 24L * 365L;
else periodlength = 0;
}
final String[] data = post.get("data", "").split(","); // a string of word hashes that shall be searched and combined
@ -76,6 +76,9 @@ public final class timeline_p {
try {fromDate = GenericFormatter.SHORT_SECOND_FORMATTER.parse(post.get("from", "20031215182700"));} catch (ParseException e) {}
try {toDate = GenericFormatter.SHORT_SECOND_FORMATTER.parse(post.get("to", GenericFormatter.SHORT_SECOND_FORMATTER.format(new Date())));} catch (ParseException e) {}
// get latest dump;
AccessTracker.dumpLog();
// fill proc with events from the given data and time period
if (proc.containsKey("queries")) {
List<EventTracker.Event> events = AccessTracker.readLog(AccessTracker.getDumpFile(), fromDate, toDate);
@ -131,7 +134,7 @@ public final class timeline_p {
return prop;
}
private static void stats(OrderedScoreMap<String> accumulation, List<EventTracker.Event> eap, long startDate, int periodlength, int head, String type) {
private static void stats(OrderedScoreMap<String> accumulation, List<EventTracker.Event> eap, long startDate, long periodlength, int head, String type) {
// write accumulation of the score map into eap
Iterator<String> si = accumulation.keys(false);
int c = 0;

View File

@ -15,6 +15,7 @@
#(compareyacy)#
<Url type="text/html" method="GET" template="http://#[thisaddress]#/yacysearch.html?query={searchTerms}&amp;startRecord={startIndex?}&amp;maximumRecords={count?}&amp;nav=all&amp;resource=global" />
<Url type="application/rss+xml" method="GET" template="http://#[thisaddress]#/yacysearch.rss?nav=&amp;query={searchTerms}&amp;startRecord={startIndex?}&amp;maximumRecords={count?}&amp;nav=all&amp;resource=global" />
<Url type="application/atom+xml" method="GET" template="http://#[thisaddress]#/yacysearch.atom?query={searchTerms}&amp;startRecord={startIndex?}&amp;maximumRecords={count?}&amp;resource=global" />
::
<Url type="text/html" method="GET" template="http://#[thisaddress]#/compare_yacy.html?query={searchTerms}&amp;left=#[search_left]#&amp;right=#[search_right]#&amp;display=2" />
#(/compareyacy)#

21
htroot/yacysearch.atom Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom"
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
>
<!-- YaCy Search Engine; http://yacy.net -->
<title>#[promoteSearchPageGreeting]#: #[rss_query]#</title>
<description>Search for #[rss_query]#</description>
<link>#[searchBaseURL]#?query=#[rss_queryenc]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;verify=#[search.verify]#</link>
<logo>#[rssYacyImageURL]#</logo>
<opensearch:startIndex>#[num-results_offset]#</opensearch:startIndex>
<opensearch:itemsPerPage>#[num-results_itemsPerPage]#</opensearch:itemsPerPage>
<link rel="search" href="http://#[thisaddress]#/opensearchdescription.xml" type="application/opensearchdescription+xml"/>
<opensearch:Query role="request" searchTerms="#[rss_queryenc]#" />
#{results}#
<!--#include virtual="yacysearchitem.atom?item=#[item]#&eventID=#[eventID]#" -->
#{/results}#
</feed>

View File

@ -150,7 +150,7 @@ public class yacysearch {
hostName += ":" + env.getConfig("port", "8090");
}
prop.put("searchBaseURL", "http://" + hostName + "/yacysearch.html");
prop.put("rssYacyImageURL", "http://" + hostName + "/env/grafics/yacy.gif");
prop.put("rssYacyImageURL", "http://" + hostName + "/env/grafics/yacy.png");
prop.put("thisaddress", hostName);
final boolean clustersearch = sb.isRobinsonMode() && sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER);
final boolean indexReceiveGranted = sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, true) || clustersearch;

View File

@ -0,0 +1,11 @@
#(content)#::<entry>
<title type="html">#[title-xml]#</title>
<link href="#[link]#" />
<summary type="html">#[description-xml]#</summary>
<updated>#[date822]#</updated>
<dc:publisher><![CDATA[#[publisher]#]]></dc:publisher>
<author><name><![CDATA[#[creator]#]]></name></author>
<dc:subject><![CDATA[#[subject]#]]></dc:subject>
<id>#[urlhash]#</id>
</entry>
#(/content)#

BIN
libt/hamcrest-core-1.3.jar Normal file

Binary file not shown.

BIN
libt/junit-4.11.jar Normal file

Binary file not shown.

Binary file not shown.

14
pom.xml
View File

@ -99,6 +99,18 @@
</configuration>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.5</version>
<configuration>
<filesets>
<fileset>
<directory>test/DATA</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
@ -279,7 +291,7 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.7</version>
<version>4.11</version>
<scope>test</scope>
</dependency>

View File

@ -222,22 +222,16 @@ public final class Condenser {
document.addMetatags(this.tags);
}
// create the synonyms set
if (synlib != null && synlib.size() > 0) {
for (String word: this.words.keySet()) {
Set<String> syms = synlib.getSynonyms(word);
if (syms != null) this.synonyms.addAll(syms);
}
}
String text = document.getTextString();
// create the synonyms set
if (synonyms != null && synlib.size() > 0) {
for (String word: this.words.keySet()) {
Set<String> syms = synlib.getSynonyms(word);
if (syms != null) this.synonyms.addAll(syms);
}
}
// create hashes for duplicate detection
// check dups with http://localhost:8090/solr/select?q=*:*&start=0&rows=3&fl=sku,fuzzy_signature_text_t,fuzzy_signature_l,fuzzy_signature_unique_b
EnhancedTextProfileSignature fuzzySignatureFactory = new EnhancedTextProfileSignature();

View File

@ -116,7 +116,7 @@ public final class Records {
this.buffer = new byte[buffersize];
this.buffercount = 0;
}
public void clear() {
try {
this.raf.setLength(0);
@ -399,10 +399,14 @@ public final class Records {
return;
}
// read entry from the file
final long endpos = this.raf.length() - this.recordsize;
this.raf.seek(endpos);
this.raf.readFully(b, start, this.recordsize);
long endpos = this.raf.length() - this.recordsize;
if (endpos >= 0) { // prevent seek error for 0 size file
this.raf.seek(endpos);
this.raf.readFully(b, start, this.recordsize);
} else {
endpos = 0;
System.arraycopy(this.zero, 0, b, start, this.recordsize);
}
// write zero bytes to the cache and to the file
this.raf.seek(endpos);
this.raf.write(this.zero, 0, this.recordsize);
@ -434,7 +438,8 @@ public final class Records {
return;
}
// shrink file
this.raf.setLength(this.raf.length() - this.recordsize);
if (this.raf.length() > 0) // already 0 length, nothing to shrink (prevent seek io error)
this.raf.setLength(this.raf.length() - this.recordsize);
}
public final void deleteOnExit() {

View File

@ -7,7 +7,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -39,22 +39,23 @@ import net.yacy.cora.util.LookAheadIterator;
public class ChunkIterator extends LookAheadIterator<byte[]> implements Iterator<byte[]> {
private final int chunksize;
/**
* create a ChunkIterator
* a ChunkIterator uses a BufferedInputStream to iterate through the file
* and is therefore a fast option to get all elements in the file as a sequence
* ATTENTION: before calling this class ensure that all file buffers are flushed
* ATTENTION: if the iterator is not read to the end or interrupted, close() must be called to release the InputStream
* @param file: the file
* @param recordsize: the size of the elements in the file
* @param chunksize: the size of the chunks that are returned by next(). remaining bytes until the lenght of recordsize are skipped
* @throws FileNotFoundException
* @throws FileNotFoundException
*/
private final DataInputStream stream;
private final int recordsize;
public ChunkIterator(final File file, final int recordsize, final int chunksize) throws FileNotFoundException {
if (!file.exists()) throw new FileNotFoundException(file.getAbsolutePath());
assert (file.exists());
@ -63,7 +64,16 @@ public class ChunkIterator extends LookAheadIterator<byte[]> implements Iterator
this.chunksize = chunksize;
this.stream = new DataInputStream(new BufferedInputStream(new FileInputStream(file), 64 * 1024));
}
/**
* Special close methode to release the used InputStream
* stream is automatically closed on last next(),
* close() needs only be called if iterator not read to the end ( hasNext() or next() has not returned null)
*/
public void close() throws IOException {
this.stream.close();
}
@Override
public byte[] next0() {
final byte[] chunk = new byte[chunksize];
@ -82,6 +92,11 @@ public class ChunkIterator extends LookAheadIterator<byte[]> implements Iterator
return chunk;
} catch (final EOFException e) {
// no real exception, this is the normal termination
try {
this.stream.close(); // close the underlaying inputstream
} catch (IOException ex) {
ConcurrentLog.logException(ex);
}
return null;
} catch (final IOException e) {
ConcurrentLog.logException(e);

View File

@ -173,7 +173,7 @@ public class Table implements Index, Iterable<Row.Entry> {
} else {
byte[] record;
key = new byte[rowdef.primaryKeyLength];
final Iterator<byte[]> ri = new ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize);
final ChunkIterator ri = new ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize);
while (ri.hasNext()) {
record = ri.next();
assert record != null;
@ -188,6 +188,7 @@ public class Table implements Index, Iterable<Row.Entry> {
this.table.addUnique(this.taildef.newEntry(record, rowdef.primaryKeyLength, true));
} catch (final SpaceExceededException e) {
this.table = null;
ri.close(); // close inputstream of chunkiterator
break;
}
} else {

View File

@ -131,17 +131,17 @@ public class EventTracker {
public final static class Event {
final private Object time; // either a String in SHORT_SECOND format, a Long with ms since epoch or Date;
final public int duration; // ms
final public long duration; // ms
final public String type;
final public Object payload;
final public int count;
public Event(final Date time, final int duration, final String type, final Object payload, final int count) {
public Event(final Date time, final long duration, final String type, final Object payload, final int count) {
this.time = time; this.duration = duration; this.type = type; this.payload = payload; this.count = count;
}
public Event(final Long time, final int duration, final String type, final Object payload, final int count) {
public Event(final Long time, final long duration, final String type, final Object payload, final int count) {
this.time = time; this.duration = duration; this.type = type; this.payload = payload; this.count = count;
}
public Event(final String time, final int duration, final String type, final Object payload, final int count) {
public Event(final String time, final long duration, final String type, final Object payload, final int count) {
this.time = time; this.duration = duration; this.type = type; this.payload = payload; this.count = count;
}
public String getFormattedDate() {

View File

@ -38,7 +38,6 @@ import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.WordCache;
@ -235,13 +234,22 @@ public class AccessTracker {
ByteArrayInputStream bais = new ByteArrayInputStream(buffer);
BufferedReader reader = new BufferedReader(new InputStreamReader(bais, "UTF-8"));
String line;
Pattern sp = Pattern.compile(" ");
while ((line = reader.readLine()) != null) {
// parse the line
String[] ls = sp.split(line);
if (line.length() < GenericFormatter.PATTERN_SHORT_SECOND.length() + 3 ||
line.charAt(GenericFormatter.PATTERN_SHORT_SECOND.length()) != ' ') continue;
String dateStr = line.substring(0, GenericFormatter.PATTERN_SHORT_SECOND.length());
int countEnd = -1;
for (int i = GenericFormatter.PATTERN_SHORT_SECOND.length() + 2; i < line.length(); i++) {
if (line.charAt(i) == ' ') { countEnd = i; break; }
}
if (countEnd == -1) continue;
String countStr = line.substring(GenericFormatter.PATTERN_SHORT_SECOND.length() + 1, countEnd);
if (countStr.length() > 5) continue;
int hits = countStr.length() == 1 ? (countStr.charAt(0)) - 48 : Integer.parseInt(countStr);
EventTracker.Event event;
if (ls.length > 1) try {
event = new EventTracker.Event(ls[0], 0, "query", line.substring(ls[0].length() + ls[1].length() + 2), Integer.valueOf(ls[1]));
try {
event = new EventTracker.Event(dateStr, 0, "query", line.substring(dateStr.length() + countStr.length() + 2), hits);
events.add(event);
} catch (NumberFormatException e) {
continue;

View File

@ -0,0 +1,62 @@
package net.yacy.crawler;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.retrieval.Request;
import static org.junit.Assert.*;
import org.junit.Test;
/**
* test HostQueue
* directorylayout is
*
* stackDir (dir)
* +-- hostDir (dir)
* +-- crawldepth.stack (file)
*/
public class HostQueueTest {
final String stackDir = "test/DATA/INDEX/QUEUE/CrawlerCoreStacks";
/**
* Test of clear method, of class HostQueue.
*/
@Test
public void testClear() throws MalformedURLException, IOException, SpaceExceededException {
File stackDirFile = new File(stackDir);
String hostDir = "a.com";
int hostPort = 80;
// open queue
HostQueue testhq = new HostQueue(stackDirFile, hostDir, hostPort, true, true);
// add a url
String urlstr = "http://" + hostDir + "/test.html";
DigestURL url = new DigestURL(urlstr);
Request req = new Request(url, null);
testhq.push(req, null, null);
int sizeA = testhq.size();
assertTrue (sizeA > 0);
testhq.clear(); // clear the complete host queue (should delete all files in stackDir)
int sizeB = testhq.size();
assertEquals (0,sizeB);
// verify stackDir empty (double check)
String[] filelist = stackDirFile.list();
assertEquals ("host files in queue dir",0,filelist.length);
testhq.close();
// verify stackDir empty
filelist = stackDirFile.list();
assertEquals ("host files in queue dir",0,filelist.length);
}
}

View File

@ -1,8 +1,5 @@
package net.yacy.document;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.containsString;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
@ -16,7 +13,8 @@ import net.yacy.document.parser.docParser;
import net.yacy.document.parser.odtParser;
import net.yacy.document.parser.ooxmlParser;
import net.yacy.document.parser.pdfParser;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertThat;
import org.junit.Test;
@ -88,7 +86,7 @@ public class ParserTest {
} catch (final InterruptedException ex) {}
}
}
@Test public void testpdfParsers() throws FileNotFoundException, Parser.Failure, MalformedURLException, UnsupportedEncodingException, IOException {
final String[][] testFiles = new String[][] {
// meaning: filename in test/parsertest, mimetype, title, creator, description,
@ -110,7 +108,7 @@ public class ParserTest {
int c;
while( (c = content.read()) != -1 )
str.append((char)c);
System.out.println("Parsed " + filename + ": " + str);
assertThat(str.toString(), containsString("In München steht ein Hofbräuhaus, dort gibt es Bier in Maßkrügen"));
assertThat(doc.dc_title(), containsString(testFile[2]));

View File

@ -6,8 +6,6 @@ import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.util.List;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;
import junit.framework.TestCase;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.document.Document;

View File

@ -0,0 +1,52 @@
package net.yacy.kelondro.io;
import java.io.File;
import net.yacy.cora.document.encoding.ASCII;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
public class RecordsTest {
final String tesDir = "test/DATA/INDEX/QUEUE";
/**
* Test of cleanLast method, of class Records.
*/
@Test
public void testCleanLast_byteArr_int() throws Exception {
File tablefile = new File(tesDir, "test.stack");
byte[] b = ASCII.getBytes("testDataString");
Records rec = new Records(tablefile, b.length);
rec.add(b, 0); // add some data
for (int i = 0; i < 5; i++) { // multiple cleanlast
rec.cleanLast(b, 0);
}
assertEquals(0,rec.size());
rec.close();
}
/**
* Test of cleanLast method, of class Records.
*/
@Test
public void testCleanLast() throws Exception {
File tablefile = new File (tesDir,"test.stack");
byte[] b = ASCII.getBytes("testdata");
Records rec = new Records(tablefile, b.length);
rec.add(b, 0); // add data
for (int i = 0; i < 5; i++) { // multiple cleanLast
rec.cleanLast();
}
assertEquals(0,rec.size());
rec.close();
}
}