-implemented a specialized snippet-fetch for media content

-changed search result preparation for media search presentation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3073 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-12-12 02:09:25 +00:00
parent 694a6e4f44
commit e4570bffaf
8 changed files with 104 additions and 70 deletions

View File

@ -4,6 +4,7 @@ function removeAllChildren(element){
}
child=element.firstChild;
while(child!=null){
removeAllChildren(child);
element.removeChild(child);
child=element.firstChild;
}

View File

@ -10,14 +10,14 @@ function AllTextSnippets() {
}
}
function AllAudioSnippets() {
function AllMediaSnippets() {
var query = document.getElementsByName("former")[0].value;
var span = document.getElementsByTagName("span");
for(var x=0;x<span.length;x++) {
if (span[x].className == 'snippetLoading') {
var url = document.getElementById("url" + span[x].id);
requestAudioSnippet(url,query);
requestMediaSnippet(url,query);
}
}
}
@ -29,10 +29,10 @@ function requestTextSnippet(url, query){
request.send(null);
}
function requestAudioSnippet(url, query){
function requestMediaSnippet(url, query){
var request=createRequestObject();
request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&remove=true&media=audio&search=' + escape(query),true);
request.onreadystatechange = function () {handleAudioState(request)};
request.onreadystatechange = function () {handleMediaState(request)};
request.send(null);
}
@ -46,7 +46,6 @@ function handleTextState(req) {
var snippetText = response.getElementsByTagName("text")[0].firstChild.data;
var urlHash = response.getElementsByTagName("urlHash")[0].firstChild.data;
var status = response.getElementsByTagName("status")[0].firstChild.data;
var links = response.getElementsByTagName("links")[0].firstChild.data;
var span = document.getElementById(urlHash)
removeAllChildren(span);
@ -54,10 +53,8 @@ function handleTextState(req) {
if (status < 11) {
span.className = "snippetLoaded";
//span.setAttribute("class", "snippetLoaded");
} else {
span.className = "snippetError";
//span.setAttribute("class", "snippetError");
}
// replace "<b>" text by <strong> node
@ -79,37 +76,23 @@ function handleTextState(req) {
pos2=snippetText.indexOf("</b>");
}
if (links > 0) {
for (i = 0; i < links; i++) {
var type = response.getElementsByTagName("type")[i].firstChild.data;
var href = response.getElementsByTagName("href")[i].firstChild.data;
var name = response.getElementsByTagName("name")[i].firstChild.data;
var attr = response.getElementsByTagName("attr")[i].firstChild.data;
span.appendChild(document.createElement("br"));
var anchor = document.createElement("a");
var hrefattr = document.createAttribute("href");
hrefattr.nodeValue = href;
anchor.setAttributeNode(hrefattr);
anchor.appendChild(document.createTextNode(name));
span.appendChild(anchor);
}
}
// add remaining string
if (snippetText != "") {
span.appendChild(document.createTextNode(snippetText));
}
}
function handleAudioState(req) {
function handleMediaState(req) {
if(req.readyState != 4){
return;
}
var response = req.responseXML;
var urlHash = response.getElementsByTagName("urlHash")[0].firstChild.data;
var links = response.getElementsByTagName("links")[0].firstChild.data;
var span = document.getElementById(urlHash)
removeAllChildren(span);
var snippetText = "";
if (links > 0) {
span.className = "snippetLoaded";
for (i = 0; i < links; i++) {
@ -117,12 +100,39 @@ function handleAudioState(req) {
var href = response.getElementsByTagName("href")[i].firstChild.data;
var name = response.getElementsByTagName("name")[i].firstChild.data;
var attr = response.getElementsByTagName("attr")[i].firstChild.data;
var nameanchor = document.createElement("a");
nameanchor.setAttribute("href", href);
nameanchor.appendChild(document.createTextNode(name));
var linkanchor = document.createElement("a");
linkanchor.setAttribute("href", href);
linkanchor.appendChild(document.createTextNode(href));
var col1 = document.createElement("td");
var width1 = document.createAttribute("width");
width1.nodeValue = 200;
col1.setAttributeNode(width1);
col1.appendChild(nameanchor);
var col2 = document.createElement("td");
var width2 = document.createAttribute("width");
width2.nodeValue = 500;
col2.setAttributeNode(width2);
col2.appendChild(linkanchor);
var row = document.createElement("tr");
row.setAttribute("class", "TableCellDark");
row.appendChild(col1);
row.appendChild(col2);
var table = document.createElement("table");
table.appendChild(row);
span.appendChild(table);
}
} else {
span.className = "snippetError";
span.appendChild(document.createTextNode(""));
}
span.appendChild(document.createTextNode(snippetText));
}
function addHover() {

View File

@ -64,7 +64,7 @@ public class snippet {
// attach link information
ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, false, 1000);
ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, true, 1000);
plasmaSnippetCache.MediaSnippet ms;
for (int i = 0; i < mediaSnippets.size(); i++) {
ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i);
@ -73,7 +73,7 @@ public class snippet {
prop.put("link_" + i + "_name", ms.name);
prop.put("link_" + i + "_attr", ms.attr);
}
System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS");
System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString);
prop.put("link", mediaSnippets.size());
prop.put("links", mediaSnippets.size());

View File

@ -56,6 +56,7 @@ document.searchform.Enter.value = "search again - catch up more links";
</script>
#(type)#
<!-- type 0: text search -->
#(excluded)#
::
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
@ -144,7 +145,50 @@ You can enrich the search results by using the 'global' option; you must also sw
(by using the proxy) to contribute to the global index.
#(/resultbottomline)#
</p>
::<!-- type 1: media search -->
#(excluded)#
::
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
#(/excluded)#
#(num-results)#
::
<p>No Results.</p>
::
<p>No Results. (length of search words must be at least 3 characters)</p>
::
<p>No Results. If you think this is unsatisfactory then you may consider to support the global index by running your own proxy/peer.
If everybody contributes, the results will get better.</p>
<p>Other possible reasons for no result:<p>
<ul>
<li>The search time was too short. Search again with same query to catch up 'late peers'</li>
<li>There is currently no support for german umlaute. Please use ae/oe/ue instead</li>
<li>Words of length &lt; 3 are not indexed. Please omit such words</li>
<li>YaCy tries to index singular instead of plural words. Please use the singular form</li>
<li>Only complete words are indexed, not parts of words</li>
<li>Don't use stopwords as search words</li>
<li>During this test phase the reaction time of remote peers is unknown.
Please repeat your search to see if there are late-responses from remote peers</li>
</ul>
<p>If you think the information you searched should exist in the global index,
then please run your own peer and start a crawl of your wanted information to make it
available for everyone. Then stay online to support crawls from other peers. Thank you!</p>
::
<p><strong>#[linkcount]#</strong> results from <strong>#[orderedcount]#</strong> ordered links of a total number of <strong>#[totalcount]#</strong> known.</p>
#(/num-results)#
<!-- linklist begin -->
#{results}#
<!-- link begin -->
<span class="snippetLoading" id="#[urlhash]#">loading snippet from <a href="#[url]#" id="url#[urlhash]#">#[urlname]#</a><br /></span>
<!-- link end -->
#{/results}#
<script type="text/javascript">
AllMediaSnippets();
addHover();
</script>
<!-- linklist end -->
::<!-- type 2: image serch: presents image thumbnails -->
::<!-- type 3: image thumbnail list for one single url -->
<table border="0" cellspacing="16" cellpadding="0">
#{results}#
<tr valign="bottom">
@ -157,39 +201,13 @@ You can enrich the search results by using the 'global' option; you must also sw
#{/results}#
</table>
#(/type)#
<p class="info">YaCy is a GPL'ed project
with the target of implementing a P2P-based global search engine.<br />
Architecture (C) by Michael Peter Christen,
<img src="/env/grafics/mcemailh.gif" alt="Mail-Adresse von Michael Peter Christen" /></p>
<p class="info">
YaCy is a GPL'ed project with the target of implementing a P2P-based global search engine.<br />
Architecture (C) by Michael Peter Christen, <img src="/env/grafics/mcemailh.gif" alt="Mail-Adresse von Michael Peter Christen" />
</p>
#(display)#
#%env/templates/simplefooter.template%#
::
<p><strong>Refine your search with these topwords</strong>:</p>
<p>
#{words}#
<a href="yacysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;order=#[order]#&amp;resource=#[resource]#&amp;time=#[time]#">#[word]#</a>
#{/words}#
</p>
#(/combine)#
<!-- linklist begin -->
#{results}#
<!-- link begin -->
<div class="searchresults">
<p class="snippet"><span class="#(snippet)#snippetLoading::snippetLoaded#(/snippet)#" id="#[urlhash]#">#(snippet)#loading snippet ...::#[text]##(/snippet)#</span></p>
</div>
<!-- link end -->
#{/results}#
<script type="text/javascript">
AllTextSnippets();
addHover();
</script>
<!-- linklist end -->
<p>
#(resultbottomline)#
::
#%env/templates/footer.template%#
#(/display)#

View File

@ -144,7 +144,7 @@ public class yacysearch {
}
if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
final int count = Integer.parseInt(post.get("count", "10"));
int count = Integer.parseInt(post.get("count", "10"));
final String order = post.get("order", plasmaSearchPreOrder.canUseYBR() ? "YBR-Date-Quality" : "Date-Quality-YBR");
boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
final boolean indexof = post.get("indexof","").equals("on");
@ -178,6 +178,9 @@ public class yacysearch {
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
// patch until better search profiles are available
if ((contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 50;
serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) {
@ -368,7 +371,7 @@ public class yacysearch {
}
}
prop.put("type", "0");
prop.put("type", (thisSearch.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : 1);
prop.put("cat", "href");
prop.put("depth", "0");
@ -406,7 +409,7 @@ public class yacysearch {
}
prop.put("type_results", line);
prop.put("type", 1); // set type of result: image list
prop.put("type", 3); // set type of result: image list
prop.put("cat", "href");
prop.put("depth", depth);
}

View File

@ -172,8 +172,8 @@ public final class plasmaParser {
* @see #initMediaExt(String)
*/
static {
String apps = "sit,hqx,img,dmg,exe,com,bat,sh";
String audio = "mp2,mp3,ogg,aac,aif,aiff,wav";
String apps = "sit,hqx,img,dmg,exe,com,bat,sh,zip,jar";
String audio = "mp2,mp3,ogg,aac,aif,aiff,wav,ogg";
String video = "swf,avi,wmv,rm,mov,mpg,mpeg,ram,m4v";
String image = "jpg,jpeg,jpe,gif,png";
initMediaExt(extString2extList(

View File

@ -212,6 +212,8 @@ public class plasmaSnippetCache {
this.href = href;
this.name = name;
this.attr = attr;
if ((this.name == null) || (this.name.length() == 0)) this.name = "_";
if ((this.attr == null) || (this.attr.length() == 0)) this.attr = "_";
}
}
@ -623,12 +625,12 @@ public class plasmaSnippetCache {
//result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null));
s = removeAppearanceHashes(url, queryhashes);
if (s.size() == 0) {
result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null));
result.add(new MediaSnippet(mediatype, url, desc, null));
continue;
}
s = removeAppearanceHashes(desc, s);
if (s.size() == 0) {
result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null));
result.add(new MediaSnippet(mediatype, url, desc, null));
continue;
}
}
@ -651,12 +653,12 @@ public class plasmaSnippetCache {
//result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height()));
s = removeAppearanceHashes(url, queryhashes);
if (s.size() == 0) {
result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height()));
result.add(new MediaSnippet("image", url, desc, ientry.width() + " x " + ientry.height()));
continue;
}
s = removeAppearanceHashes(desc, s);
if (s.size() == 0) {
result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height()));
result.add(new MediaSnippet("image", url, desc, ientry.width() + " x " + ientry.height()));
continue;
}
}

View File

@ -188,7 +188,7 @@ parseableMimeTypes.URLREDIRECTOR=
# a comma-separated list of extensions that denote media file formats
# this is important to recognize <a href> - tags as not-html reference
# These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip
mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip
parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp
# Promotion Strings