mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Merge branch 'master' of git@gitorious.org:yacy/rc1.git
This commit is contained in:
commit
2fd8a0ead6
|
@ -520,7 +520,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
if (rel.length() == 0) rel = "nofollow"; else if (rel.indexOf("nofollow") < 0) rel += ",nofollow";
|
||||
tag.opts.put("rel", rel);
|
||||
}
|
||||
tag.opts.put("text", new String(tag.content.getChars()));
|
||||
tag.opts.put("text", stripAllTags(tag.content.getChars())); // strip any inline html in tag text like "<a ...> <span>test</span> </a>"
|
||||
tag.opts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
|
||||
url.setAll(tag.opts);
|
||||
recursiveParse(url, tag.content.getChars());
|
||||
|
|
|
@ -5,8 +5,6 @@ import java.io.IOException;
|
|||
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
|
||||
import net.yacy.search.schema.CollectionSchema;
|
||||
import net.yacy.search.schema.WebgraphSchema;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
@ -57,12 +55,8 @@ public class EmbeddedSolrConnectorTest {
|
|||
|
||||
System.out.println("query solr");
|
||||
long expResult = 1;
|
||||
SolrDocumentList result;
|
||||
try {
|
||||
result = solr.getDocumentListByQuery(CollectionSchema.text_t.name() + ":tempor", 0, 10,"");
|
||||
assertEquals(expResult, result.getNumFound());
|
||||
} catch (final IOException ex) {
|
||||
fail("Solr query no result");
|
||||
}
|
||||
long result = solr.getCountByQuery(CollectionSchema.text_t.name() + ":tempor");
|
||||
System.out.println("found = " + result + " (expected = 1 )");
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,11 +5,15 @@ import java.io.FileInputStream;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.List;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static junit.framework.Assert.assertTrue;
|
||||
import junit.framework.TestCase;
|
||||
import net.yacy.cora.document.id.AnchorURL;
|
||||
import net.yacy.document.Document;
|
||||
import net.yacy.document.Parser;
|
||||
import net.yacy.document.parser.html.ContentScraper;
|
||||
import static net.yacy.document.parser.htmlParser.parseToScraper;
|
||||
import org.junit.Test;
|
||||
|
||||
public class htmlParserTest extends TestCase {
|
||||
|
@ -80,4 +84,35 @@ public class htmlParserTest extends TestCase {
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of parseToScraper method, of class htmlParser.
|
||||
*/
|
||||
@Test
|
||||
public void testParseToScraper_4args() throws Exception {
|
||||
// test link with inline html in text
|
||||
// expectation to deliver pure text as it is possibly indexed in outboundlinks_anchortext_txt/inboundlinks_anchortext_txt
|
||||
final AnchorURL url = new AnchorURL("http://localhost/");
|
||||
final String mimetype = "text/html";
|
||||
final String testhtml = "<html><bod>"
|
||||
+ "<a href='x1.html'><span>testtext</span></a>" // "testtext"
|
||||
+ "<a href=\"http://localhost/x2.html\"> <i id=\"home-icon\" class=\"img-sprite\"></i>Start</a>" // "Start"
|
||||
+ "<a href='x1.html'><span class='button'><img src='pic.gif'/></span></a>" // "" + image
|
||||
+ "</body></html>";
|
||||
|
||||
ContentScraper scraper = parseToScraper(url, mimetype, testhtml, 10);
|
||||
List<AnchorURL> anchorlist = scraper.getAnchors();
|
||||
|
||||
String linktxt = anchorlist.get(0).getTextProperty();
|
||||
assertEquals("testtext", linktxt);
|
||||
|
||||
linktxt = anchorlist.get(1).getTextProperty();
|
||||
assertEquals("Start", linktxt);
|
||||
|
||||
linktxt = anchorlist.get(2).getTextProperty();
|
||||
assertEquals("", linktxt);
|
||||
|
||||
int cnt = scraper.getImages().size();
|
||||
assertEquals(1,cnt);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user