test case for HostBalancer issue in intranet mode

with file:// protocol, 2 hostqueues accessing same cache file concurrently
http://mantis.tokeek.de/view.php?id=668
Reason seems to be diff. hosthash key of hostqueues on reopen. 
Internal queue key and external representation (directoryname currently hostname.port) must be adjusted to fix it (not done yet).
This commit is contained in:
reger 2016-07-04 02:44:58 +02:00
parent 16420e5507
commit fcc29c36f0

View File

@ -0,0 +1,87 @@
package net.yacy.crawler;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.data.WorkTables;
import static net.yacy.kelondro.util.FileUtils.deletedelete;
import org.junit.Test;
import static org.junit.Assert.*;
public class HostBalancerTest {
final File queuesRoot = new File("test/DATA/INDEX/QUEUES");
final File datadir = new File("test/DATA");
/**
* Test of reopen existing HostBalancer cache to test/demonstrate issue with
* HostQueue for file: protocol
*/
@Test
public void testReopen() throws IOException, SpaceExceededException, InterruptedException {
boolean exceed134217727 = true;
int onDemandLimit = 1000;
String hostDir = "C:\\filedirectory";
// prepare one urls for push test
String urlstr = "file:///" + hostDir;
DigestURL url = new DigestURL(urlstr);
Request req = new Request(url, null);
deletedelete(queuesRoot); // start clean test
HostBalancer hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727);
Thread.sleep(100); // wait for file operation
hb.clear();
Thread.sleep(100);
assertEquals("After clear", 0, hb.size());
WorkTables wt = new WorkTables(datadir);
RobotsTxt rob = new RobotsTxt(wt, null);
String res = hb.push(req, null, rob); // push url
assertNull(res); // should have no error text
assertTrue(hb.has(url.hash())); // check existence
assertEquals("first push of one url", 1, hb.size()); // expected size=1
res = hb.push(req, null, rob); // push same url (should be rejected = double occurence)
assertNotNull(res); // should state double occurrence
assertTrue(hb.has(url.hash()));
assertEquals("second push of same url", 1, hb.size());
hb.close(); // close
Thread.sleep(200); // wait a bit for file operation
hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727); // reopen balancer
Thread.sleep(200); // wait a bit for file operation
assertEquals("size after reopen (with one existing url)", 1, hb.size()); // expect size=1 from previous push
assertTrue("check existance of pushed url", hb.has(url.hash())); // check url exists (it fails as after reopen internal queue.hosthash is wrong)
res = hb.push(req, null, rob); // push same url as before (should be rejected, but isn't due to hosthash mismatch afte reopen)
assertNotNull("should state double occurence", res);
assertEquals("first push of same url after reopen", 1, hb.size()); // should stay size=1
assertTrue("check existance of pushed url", hb.has(url.hash()));
res = hb.push(req, null, rob);
assertNotNull("should state double occurence", res);
assertTrue("check existance of pushed url", hb.has(url.hash()));
assertEquals("second push of same url after reopen", 1, hb.size()); // double check, should stay size=1
// list all urls in hostbalancer
Iterator<Request> it = hb.iterator();
while (it.hasNext()) {
Request rres = it.next();
System.out.println(rres.toString());
}
hb.close();
}
}