yacy_search_server/htroot/ViewFile.java

419 lines
18 KiB
Java
Raw Normal View History

//ViewFile.java
//-----------------------
//part of YaCy
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//last major change: 12.07.2004
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//you must compile this file with
//javac -classpath .:../Classes Status.java
//if the shell's current path is HTROOT
*) plasmaHTCache: - method loadResourceContent defined as deprecated. Please do not use this function to avoid OutOfMemory Exceptions when loading large files - new function getResourceContentStream to get an inputstream of a cache file - new function getResourceContentLength to get the size of a cached file *) httpc.java: - Bugfix: resource content was loaded into memory even if this was not requested *) Crawler: - new option to hold loaded resource content in memory - adding option to use the worker class without the worker pool (needed by the snippet fetcher) *) plasmaSnippetCache - snippet loader does not use a crawl-worker from pool but uses a newly created instance to avoid blocking by normal crawling activity. - now operates on streams instead of byte arrays to avoid OutOfMemory Exceptions when operating on large files - snippet loader now forces the crawl-worker to keep the loaded resource in memory to avoid IO *) plasmaCondenser: adding new function getWords that can directly operate on input streams *) Parsers - keep resource in memory whenever possible (to avoid IO) - when parsing from stream the content length must be passed to the parser function now. this length value is needed by the parsers to decide if the parsed resource content is to large to hold it in memory and must be stored to file - AbstractParser.java: new function to pass the contentLength of a resource to the parsers git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2701 6c8d7289-2bf4-0310-a012-ef5d649a1542
2006-10-03 13:05:48 +02:00
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URLDecoder;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader;
import de.anomic.kelondro.text.Document;
import de.anomic.kelondro.text.MetadataRowContainer;
import de.anomic.kelondro.text.URLMetadata;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
public class ViewFile {
public static final int VIEW_MODE_NO_TEXT = 0;
public static final int VIEW_MODE_AS_PLAIN_TEXT = 1;
public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
public static final int VIEW_MODE_AS_IFRAME = 4;
public static final int VIEW_MODE_AS_LINKLIST = 5;
public static final int VIEW_MODE_AS_PARSED_WORDS = 6;
private static final String HIGHLIGHT_CSS = "searchHighlight";
private static final int MAX_HIGHLIGHTS = 6;
public static serverObjects respond(final httpRequestHeader header, final serverObjects post, final serverSwitch<?> env) {
final serverObjects prop = new serverObjects();
final plasmaSwitchboard sb = (plasmaSwitchboard)env;
final int display = (post == null) ? 0 : post.getInt("display", 0);
prop.put("display", display);
prop.put("error_display", display);
if (post != null && post.containsKey("words"))
prop.putHTML("error_words", post.get("words"));
else {
prop.putHTML("error_words", "");
}
final String viewMode = post.get("viewMode","sentences");
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error_vMode-" + viewMode, "1");
yacyURL url = null;
String descr = "";
final int wordCount = 0;
int size = 0;
boolean pre = false;
// getting the url hash from which the content should be loaded
final String urlHash = post.get("urlHash","");
if (urlHash.length() > 0) {
// getting the urlEntry that belongs to the url hash
MetadataRowContainer urlEntry = null;
urlEntry = sb.webIndex.getURL(urlHash, null, 0);
if (urlEntry == null) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "2");
prop.put("viewMode",VIEW_MODE_NO_TEXT);
return prop;
}
// getting the url that belongs to the entry
final URLMetadata metadata = urlEntry.metadata();
if ((metadata == null) || (metadata.url() == null)) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "3");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
url = metadata.url();
descr = metadata.dc_title();
urlEntry.wordCount();
size = urlEntry.size();
pre = urlEntry.flags().get(plasmaCondenser.flag_cat_indexof);
}
// alternatively, get the url simply from a url String
// this can be used as a simple tool to test the text parser
final String urlString = post.get("url", "");
if (urlString.length() > 0) try {
// this call forces the peer to download web pages
// it is therefore protected by the admin password
if (!sb.verifyAuthentication(header, false)) {
prop.put("AUTHENTICATE", "admin log-in"); // force log-in
return prop;
}
// define an url by post parameter
url = new yacyURL(urlString, null);
pre = post.get("pre", "false").equals("true");
} catch (final MalformedURLException e) {}
if (url == null) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "1");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
// loading the resource content as byte array
InputStream resource = null;
long resourceLength = -1;
httpResponseHeader responseHeader = null;
String resMime = null;
// trying to load the resource body
resource = plasmaHTCache.getResourceContentStream(url);
resourceLength = plasmaHTCache.getResourceContentLength(url);
try {
responseHeader = plasmaHTCache.loadResponseHeader(url);
} catch (IllegalAccessException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// if the resource body was not cached we try to load it from web
if (resource == null) {
Document entry = null;
try {
entry = sb.crawlQueues.loadResourceFromWeb(url, 10000, false, true, false);
} catch (final Exception e) {
prop.put("error", "4");
prop.putHTML("error_errorText", e.getMessage());
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
if (entry != null) {
resource = plasmaHTCache.getResourceContentStream(url);
resourceLength = plasmaHTCache.getResourceContentLength(url);
}
if (resource == null) {
prop.put("error", "4");
prop.put("error_errorText", "No resource available");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
}
// try to load resource metadata
if (responseHeader == null) {
// try to load the metadata from cache
try {
responseHeader = plasmaHTCache.loadResponseHeader(url);
} catch (final Exception e) {
/* ignore this */
}
// if the metadata was not cached try to load it from web
if (responseHeader == null) {
final String protocol = url.getProtocol();
if (!((protocol.equals("http") || protocol.equals("https")))) {
prop.put("error", "6");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
responseHeader = httpClient.whead(url.toString());
if (responseHeader == null) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "4");
prop.put("error_errorText", "Unable to load resource metadata.");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
resMime = responseHeader.mime();
}
} else {
resMime = responseHeader.mime();
}
final String[] wordArray = wordArray(post.get("words", null));
if (viewMode.equals("plain")) {
// TODO: how to handle very large files here ?
String content;
try {
content = new String(FileUtils.read(resource), "UTF-8");
} catch (final Exception e) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "4");
prop.putHTML("error_errorText", e.getMessage());
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
} finally {
if (resource != null)
try {
resource.close();
} catch (final Exception e) {
/* ignore this */
}
}
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "0");
prop.put("viewMode", VIEW_MODE_AS_PLAIN_TEXT);
prop.put("viewMode_plainText", markup(wordArray, content).replaceAll("\n", "<br />").replaceAll("\t", "&nbsp;&nbsp;&nbsp;&nbsp;"));
} else if (viewMode.equals("iframe")) {
prop.put("viewMode", VIEW_MODE_AS_IFRAME);
prop.put("viewMode_url", url.toNormalform(false, true));
} else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("words") || viewMode.equals("links")) {
// parsing the resource content
plasmaParserDocument document = null;
try {
document = plasmaSnippetCache.parseDocument(url, resourceLength, resource);
if (document == null) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "5");
prop.put("error_errorText", "Unknown error");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
} catch (final ParserException e) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "5");
prop.putHTML("error_errorText", e.getMessage());
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
} finally {
if (resource != null)
try {
resource.close();
} catch (final Exception e) {
/* ignore this */
}
}
resMime = document.dc_format();
if (viewMode.equals("parsed")) {
final String content = new String(document.getTextBytes());
// content = wikiCode.replaceHTML(content); // added by Marc Nause
prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT);
prop.put("viewMode_parsedText", markup(wordArray, content).replaceAll("\n", "<br />").replaceAll("\t", "&nbsp;&nbsp;&nbsp;&nbsp;"));
} else if (viewMode.equals("sentences")) {
prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES);
final Iterator<StringBuilder> sentences = document.getSentences(pre);
boolean dark = true;
int i = 0;
String sentence;
if (sentences != null) {
// Search word highlighting
while (sentences.hasNext()) {
sentence = sentences.next().toString();
if (sentence.trim().length() > 0) {
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("viewMode_sentences_" + i + "_nr", i + 1);
prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, sentence));
prop.put("viewMode_sentences_" + i + "_dark", dark ? "1" : "0");
dark = !dark;
i++;
}
}
}
prop.put("viewMode_sentences", i);
} else if (viewMode.equals("words")) {
prop.put("viewMode", VIEW_MODE_AS_PARSED_WORDS);
final Iterator<StringBuilder> sentences = document.getSentences(pre);
boolean dark = true;
int i = 0;
String sentence, token;
if (sentences != null) {
// Search word highlighting
while (sentences.hasNext()) {
sentence = sentences.next().toString();
Enumeration<StringBuilder> tokens = plasmaCondenser.wordTokenizer(sentence, "UTF-8");
while (tokens.hasMoreElements()) {
token = tokens.nextElement().toString();
if (token.length() > 0) {
prop.put("viewMode_words_" + i + "_nr", i + 1);
prop.put("viewMode_words_" + i + "_word", token);
prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0");
dark = !dark;
i++;
}
}
}
}
prop.put("viewMode_words", i);
} else if (viewMode.equals("links")) {
prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
boolean dark = true;
int i = 0;
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
dark = (i % 2 == 0);
final HashMap<String, htmlFilterImageEntry> ts = document.getImages();
final Iterator<htmlFilterImageEntry> tsi = ts.values().iterator();
htmlFilterImageEntry entry;
while (tsi.hasNext()) {
entry = tsi.next();
prop.put("viewMode_links_" + i + "_nr", i);
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "image");
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("viewMode_links_" + i + "_text", markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(false, true));
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(false, true)));
if (entry.width() > 0 && entry.height() > 0)
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("viewMode_links_" + i + "_attr", entry.width() + "x" + entry.height() + " Pixel");
else
prop.put("viewMode_links_" + i + "_attr", "unknown");
dark = !dark;
i++;
}
i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getHyperlinks(), "href", (i % 2 == 0));
prop.put("viewMode_links", i);
}
if (document != null) document.close();
}
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error", "0");
prop.put("error_url", url.toNormalform(false, true));
prop.put("error_hash", urlHash);
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.put("error_wordCount", wordCount);
prop.putHTML("error_desc", descr);
prop.putNum("error_size", size);
prop.put("error_mimeTypeAvailable", (resMime == null) ? "0" : "1");
prop.put("error_mimeTypeAvailable_mimeType", resMime);
return prop;
}
private static final String[] wordArray(String words) {
String[] w = new String[0];
if (words == null || words.length() == 0) return w;
try {
words = URLDecoder.decode(words, "UTF-8");
w = words.substring(1, words.length() - 1).split(",");
} catch (final UnsupportedEncodingException e) {}
return w;
}
private static final String markup(final String[] wordArray, String message) {
message = htmlFilterCharacterCoding.unicode2html(message, true);
if (wordArray != null)
for (int j = 0; j < wordArray.length; j++) {
final String currentWord = wordArray[j].trim();
// TODO: replace upper-/lowercase words as well
message = message.replaceAll(currentWord,
"<span class=\"" + HIGHLIGHT_CSS + ((j % MAX_HIGHLIGHTS) + 1) + "\">" +
currentWord +
"</span>");
}
return message;
}
private static int putMediaInfo(final serverObjects prop, final String[] wordArray, int c, final Map<yacyURL, String> media, final String name, boolean dark) {
final Iterator<Map.Entry<yacyURL, String>> mi = media.entrySet().iterator();
Map.Entry<yacyURL, String> entry;
int i = 0;
while (mi.hasNext()) {
entry = mi.next();
prop.put("viewMode_links_" + c + "_nr", c);
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.putHTML("viewMode_links_" + c + "_type", name);
prop.put("viewMode_links_" + c + "_text", markup(wordArray, entry.getValue()));
prop.put("viewMode_links_" + c + "_link", markup(wordArray, entry.getKey().toNormalform(true, false)));
prop.put("viewMode_links_" + c + "_url", entry.getKey().toNormalform(true, false));
* Complete number localization and provide a more reasonable interface to serverObjects: - put(key, value) methods are now used if a value added to the map should be kept as it is. Numbers are transformed (but not formatted) to an equivalent String representation. - putASIS(...) have been removed, now done with simple put(...) (see above). - puNum(...) can be used for number values which should be stored in a formatted way, either depending on the current locale setting for yacy (default) or in a "none" locale (see javadocs and setLocalize()). - putHTML(...) escapes special characters into corresponding HTML enities ('<' => '&lt;') which was done with put(...) before and so was called too often, becauses it is necessary only for very few cases. Additionally there is a "forXML" mode which only replaces < > & ". In short: Use put(...) for almost everything, use putXY(...) if you need some special transformation of the value. A few bugs have been fixed as well, and there should be a small performance improvement for complex pages with a lot of values. * added additional Sum/Avg rows to access tracker pages, see http://forum.yacy-websuche.de/viewtopic.php?f=5&t=456 * removed duplicate code (mostly related to the big changes above). TODO: - make sure, number formats work as expected _everywhere_, report overseen stuff http://forum.yacy-websuche.de/viewtopic.php?f=5&t=437 - probably a good idea to add special putDate() methods as they are used in many pages and create duplicated formatting code + maybe some centralized handling for memory value formatting. - further improve the speed of page creation for the WatchCrawler. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4178 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-10-24 23:38:19 +02:00
prop.putHTML("viewMode_links_" + c + "_attr", "");
dark = !dark;
c++;
i++;
}
return i;
}
}