mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Also handle text content when parsing XML within limits.
This commit is contained in:
parent
f38fb7f02c
commit
acab6a6def
|
@ -193,11 +193,17 @@ public class GenericXMLParser extends AbstractParser implements Parser {
|
|||
} catch(StreamLimitException e) {
|
||||
limitExceeded = true;
|
||||
}
|
||||
|
||||
if (writer.isOverflow()) {
|
||||
throw new Parser.Failure("Not enough Memory available for generic the XML parser : "
|
||||
+ Formatter.bytesToString(availableMemory), location);
|
||||
}
|
||||
|
||||
|
||||
/* create the parsed document with empty text content */
|
||||
/* Create the parsed document with eventually only partial part of the text and links */
|
||||
final byte[] contentBytes = UTF8.getBytes(writer.toString());
|
||||
Document[] docs = new Document[] { new Document(location, mimeType, detectedCharset, this, null, null, null, null, "",
|
||||
null, null, 0.0d, 0.0d, new byte[0], detectedURLs, null, null, false, new Date()) };
|
||||
null, null, 0.0d, 0.0d, contentBytes, detectedURLs, null, null, false, new Date()) };
|
||||
docs[0].setPartiallyParsed(limitExceeded);
|
||||
return docs;
|
||||
} catch (final Exception e) {
|
||||
|
|
|
@ -390,6 +390,8 @@ public class GenericXMLParserTest {
|
|||
assertEquals(1, documents.length);
|
||||
assertFalse(documents[0].isPartiallyParsed());
|
||||
|
||||
assertTrue(documents[0].getTextString().contains("And this is a relative link"));
|
||||
|
||||
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
|
||||
assertNotNull(detectedAnchors);
|
||||
assertEquals(5, detectedAnchors.size());
|
||||
|
@ -410,6 +412,9 @@ public class GenericXMLParserTest {
|
|||
assertEquals(1, documents.length);
|
||||
assertTrue(documents[0].isPartiallyParsed());
|
||||
|
||||
assertTrue(documents[0].getTextString().contains("Home page"));
|
||||
assertFalse(documents[0].getTextString().contains("And this is a relative link"));
|
||||
|
||||
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
|
||||
assertNotNull(detectedAnchors);
|
||||
assertEquals(2, detectedAnchors.size());
|
||||
|
@ -447,6 +452,9 @@ public class GenericXMLParserTest {
|
|||
assertEquals(1, documents.length);
|
||||
assertTrue(documents[0].isPartiallyParsed());
|
||||
|
||||
assertTrue(documents[0].getTextString().contains("and this is a mention to a relative URL"));
|
||||
assertFalse(documents[0].getTextString().contains("And this is a relative link to another"));
|
||||
|
||||
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
|
||||
assertNotNull(detectedAnchors);
|
||||
assertEquals(3, detectedAnchors.size());
|
||||
|
|
Loading…
Reference in New Issue
Block a user