-implemented a specialized snippet-fetch for media content

-changed search result preparation for media search presentation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3073 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-12-12 02:09:25 +00:00
parent 694a6e4f44
commit e4570bffaf
8 changed files with 104 additions and 70 deletions

View File

@ -4,6 +4,7 @@ function removeAllChildren(element){
} }
child=element.firstChild; child=element.firstChild;
while(child!=null){ while(child!=null){
removeAllChildren(child);
element.removeChild(child); element.removeChild(child);
child=element.firstChild; child=element.firstChild;
} }

View File

@ -10,14 +10,14 @@ function AllTextSnippets() {
} }
} }
function AllAudioSnippets() { function AllMediaSnippets() {
var query = document.getElementsByName("former")[0].value; var query = document.getElementsByName("former")[0].value;
var span = document.getElementsByTagName("span"); var span = document.getElementsByTagName("span");
for(var x=0;x<span.length;x++) { for(var x=0;x<span.length;x++) {
if (span[x].className == 'snippetLoading') { if (span[x].className == 'snippetLoading') {
var url = document.getElementById("url" + span[x].id); var url = document.getElementById("url" + span[x].id);
requestAudioSnippet(url,query); requestMediaSnippet(url,query);
} }
} }
} }
@ -29,10 +29,10 @@ function requestTextSnippet(url, query){
request.send(null); request.send(null);
} }
function requestAudioSnippet(url, query){ function requestMediaSnippet(url, query){
var request=createRequestObject(); var request=createRequestObject();
request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&remove=true&media=audio&search=' + escape(query),true); request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&remove=true&media=audio&search=' + escape(query),true);
request.onreadystatechange = function () {handleAudioState(request)}; request.onreadystatechange = function () {handleMediaState(request)};
request.send(null); request.send(null);
} }
@ -46,7 +46,6 @@ function handleTextState(req) {
var snippetText = response.getElementsByTagName("text")[0].firstChild.data; var snippetText = response.getElementsByTagName("text")[0].firstChild.data;
var urlHash = response.getElementsByTagName("urlHash")[0].firstChild.data; var urlHash = response.getElementsByTagName("urlHash")[0].firstChild.data;
var status = response.getElementsByTagName("status")[0].firstChild.data; var status = response.getElementsByTagName("status")[0].firstChild.data;
var links = response.getElementsByTagName("links")[0].firstChild.data;
var span = document.getElementById(urlHash) var span = document.getElementById(urlHash)
removeAllChildren(span); removeAllChildren(span);
@ -54,10 +53,8 @@ function handleTextState(req) {
if (status < 11) { if (status < 11) {
span.className = "snippetLoaded"; span.className = "snippetLoaded";
//span.setAttribute("class", "snippetLoaded");
} else { } else {
span.className = "snippetError"; span.className = "snippetError";
//span.setAttribute("class", "snippetError");
} }
// replace "<b>" text by <strong> node // replace "<b>" text by <strong> node
@ -79,37 +76,23 @@ function handleTextState(req) {
pos2=snippetText.indexOf("</b>"); pos2=snippetText.indexOf("</b>");
} }
if (links > 0) {
for (i = 0; i < links; i++) {
var type = response.getElementsByTagName("type")[i].firstChild.data;
var href = response.getElementsByTagName("href")[i].firstChild.data;
var name = response.getElementsByTagName("name")[i].firstChild.data;
var attr = response.getElementsByTagName("attr")[i].firstChild.data;
span.appendChild(document.createElement("br"));
var anchor = document.createElement("a");
var hrefattr = document.createAttribute("href");
hrefattr.nodeValue = href;
anchor.setAttributeNode(hrefattr);
anchor.appendChild(document.createTextNode(name));
span.appendChild(anchor);
}
}
// add remaining string // add remaining string
if (snippetText != "") { if (snippetText != "") {
span.appendChild(document.createTextNode(snippetText)); span.appendChild(document.createTextNode(snippetText));
} }
} }
function handleAudioState(req) { function handleMediaState(req) {
if(req.readyState != 4){ if(req.readyState != 4){
return; return;
} }
var response = req.responseXML; var response = req.responseXML;
var urlHash = response.getElementsByTagName("urlHash")[0].firstChild.data;
var links = response.getElementsByTagName("links")[0].firstChild.data; var links = response.getElementsByTagName("links")[0].firstChild.data;
var span = document.getElementById(urlHash)
var snippetText = ""; removeAllChildren(span);
if (links > 0) { if (links > 0) {
span.className = "snippetLoaded"; span.className = "snippetLoaded";
for (i = 0; i < links; i++) { for (i = 0; i < links; i++) {
@ -117,12 +100,39 @@ function handleAudioState(req) {
var href = response.getElementsByTagName("href")[i].firstChild.data; var href = response.getElementsByTagName("href")[i].firstChild.data;
var name = response.getElementsByTagName("name")[i].firstChild.data; var name = response.getElementsByTagName("name")[i].firstChild.data;
var attr = response.getElementsByTagName("attr")[i].firstChild.data; var attr = response.getElementsByTagName("attr")[i].firstChild.data;
var nameanchor = document.createElement("a");
nameanchor.setAttribute("href", href);
nameanchor.appendChild(document.createTextNode(name));
var linkanchor = document.createElement("a");
linkanchor.setAttribute("href", href);
linkanchor.appendChild(document.createTextNode(href));
var col1 = document.createElement("td");
var width1 = document.createAttribute("width");
width1.nodeValue = 200;
col1.setAttributeNode(width1);
col1.appendChild(nameanchor);
var col2 = document.createElement("td");
var width2 = document.createAttribute("width");
width2.nodeValue = 500;
col2.setAttributeNode(width2);
col2.appendChild(linkanchor);
var row = document.createElement("tr");
row.setAttribute("class", "TableCellDark");
row.appendChild(col1);
row.appendChild(col2);
var table = document.createElement("table");
table.appendChild(row);
span.appendChild(table);
} }
} else { } else {
span.className = "snippetError"; span.className = "snippetError";
span.appendChild(document.createTextNode(""));
} }
span.appendChild(document.createTextNode(snippetText));
} }
function addHover() { function addHover() {

View File

@ -64,7 +64,7 @@ public class snippet {
// attach link information // attach link information
ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, false, 1000); ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, true, 1000);
plasmaSnippetCache.MediaSnippet ms; plasmaSnippetCache.MediaSnippet ms;
for (int i = 0; i < mediaSnippets.size(); i++) { for (int i = 0; i < mediaSnippets.size(); i++) {
ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i); ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i);
@ -73,7 +73,7 @@ public class snippet {
prop.put("link_" + i + "_name", ms.name); prop.put("link_" + i + "_name", ms.name);
prop.put("link_" + i + "_attr", ms.attr); prop.put("link_" + i + "_attr", ms.attr);
} }
System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS"); System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString);
prop.put("link", mediaSnippets.size()); prop.put("link", mediaSnippets.size());
prop.put("links", mediaSnippets.size()); prop.put("links", mediaSnippets.size());

View File

@ -56,6 +56,7 @@ document.searchform.Enter.value = "search again - catch up more links";
</script> </script>
#(type)# #(type)#
<!-- type 0: text search -->
#(excluded)# #(excluded)#
:: ::
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p> <p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
@ -144,7 +145,50 @@ You can enrich the search results by using the 'global' option; you must also sw
(by using the proxy) to contribute to the global index. (by using the proxy) to contribute to the global index.
#(/resultbottomline)# #(/resultbottomline)#
</p> </p>
::<!-- type 1: media search -->
#(excluded)#
:: ::
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
#(/excluded)#
#(num-results)#
::
<p>No Results.</p>
::
<p>No Results. (length of search words must be at least 3 characters)</p>
::
<p>No Results. If you think this is unsatisfactory then you may consider to support the global index by running your own proxy/peer.
If everybody contributes, the results will get better.</p>
<p>Other possible reasons for no result:<p>
<ul>
<li>The search time was too short. Search again with same query to catch up 'late peers'</li>
<li>There is currently no support for german umlaute. Please use ae/oe/ue instead</li>
<li>Words of length &lt; 3 are not indexed. Please omit such words</li>
<li>YaCy tries to index singular instead of plural words. Please use the singular form</li>
<li>Only complete words are indexed, not parts of words</li>
<li>Don't use stopwords as search words</li>
<li>During this test phase the reaction time of remote peers is unknown.
Please repeat your search to see if there are late-responses from remote peers</li>
</ul>
<p>If you think the information you searched should exist in the global index,
then please run your own peer and start a crawl of your wanted information to make it
available for everyone. Then stay online to support crawls from other peers. Thank you!</p>
::
<p><strong>#[linkcount]#</strong> results from <strong>#[orderedcount]#</strong> ordered links of a total number of <strong>#[totalcount]#</strong> known.</p>
#(/num-results)#
<!-- linklist begin -->
#{results}#
<!-- link begin -->
<span class="snippetLoading" id="#[urlhash]#">loading snippet from <a href="#[url]#" id="url#[urlhash]#">#[urlname]#</a><br /></span>
<!-- link end -->
#{/results}#
<script type="text/javascript">
AllMediaSnippets();
addHover();
</script>
<!-- linklist end -->
::<!-- type 2: image serch: presents image thumbnails -->
::<!-- type 3: image thumbnail list for one single url -->
<table border="0" cellspacing="16" cellpadding="0"> <table border="0" cellspacing="16" cellpadding="0">
#{results}# #{results}#
<tr valign="bottom"> <tr valign="bottom">
@ -157,39 +201,13 @@ You can enrich the search results by using the 'global' option; you must also sw
#{/results}# #{/results}#
</table> </table>
#(/type)# #(/type)#
<p class="info">YaCy is a GPL'ed project <p class="info">
with the target of implementing a P2P-based global search engine.<br /> YaCy is a GPL'ed project with the target of implementing a P2P-based global search engine.<br />
Architecture (C) by Michael Peter Christen, Architecture (C) by Michael Peter Christen, <img src="/env/grafics/mcemailh.gif" alt="Mail-Adresse von Michael Peter Christen" />
<img src="/env/grafics/mcemailh.gif" alt="Mail-Adresse von Michael Peter Christen" /></p> </p>
#(display)# #(display)#
#%env/templates/simplefooter.template%# #%env/templates/simplefooter.template%#
:: ::
<p><strong>Refine your search with these topwords</strong>:</p>
<p>
#{words}#
<a href="yacysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;order=#[order]#&amp;resource=#[resource]#&amp;time=#[time]#">#[word]#</a>
#{/words}#
</p>
#(/combine)#
<!-- linklist begin -->
#{results}#
<!-- link begin -->
<div class="searchresults">
<p class="snippet"><span class="#(snippet)#snippetLoading::snippetLoaded#(/snippet)#" id="#[urlhash]#">#(snippet)#loading snippet ...::#[text]##(/snippet)#</span></p>
</div>
<!-- link end -->
#{/results}#
<script type="text/javascript">
AllTextSnippets();
addHover();
</script>
<!-- linklist end -->
<p>
#(resultbottomline)#
::
#%env/templates/footer.template%# #%env/templates/footer.template%#
#(/display)# #(/display)#

View File

@ -144,7 +144,7 @@ public class yacysearch {
} }
if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {} if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
final int count = Integer.parseInt(post.get("count", "10")); int count = Integer.parseInt(post.get("count", "10"));
final String order = post.get("order", plasmaSearchPreOrder.canUseYBR() ? "YBR-Date-Quality" : "Date-Quality-YBR"); final String order = post.get("order", plasmaSearchPreOrder.canUseYBR() ? "YBR-Date-Quality" : "Date-Quality-YBR");
boolean global = (post == null) ? true : post.get("resource", "global").equals("global"); boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
final boolean indexof = post.get("indexof","").equals("on"); final boolean indexof = post.get("indexof","").equals("on");
@ -178,6 +178,9 @@ public class yacysearch {
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE; if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP; if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
// patch until better search profiles are available
if ((contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 50;
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) { if (post.get("cat", "href").equals("href")) {
@ -368,7 +371,7 @@ public class yacysearch {
} }
} }
prop.put("type", "0"); prop.put("type", (thisSearch.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : 1);
prop.put("cat", "href"); prop.put("cat", "href");
prop.put("depth", "0"); prop.put("depth", "0");
@ -406,7 +409,7 @@ public class yacysearch {
} }
prop.put("type_results", line); prop.put("type_results", line);
prop.put("type", 1); // set type of result: image list prop.put("type", 3); // set type of result: image list
prop.put("cat", "href"); prop.put("cat", "href");
prop.put("depth", depth); prop.put("depth", depth);
} }

View File

@ -172,8 +172,8 @@ public final class plasmaParser {
* @see #initMediaExt(String) * @see #initMediaExt(String)
*/ */
static { static {
String apps = "sit,hqx,img,dmg,exe,com,bat,sh"; String apps = "sit,hqx,img,dmg,exe,com,bat,sh,zip,jar";
String audio = "mp2,mp3,ogg,aac,aif,aiff,wav"; String audio = "mp2,mp3,ogg,aac,aif,aiff,wav,ogg";
String video = "swf,avi,wmv,rm,mov,mpg,mpeg,ram,m4v"; String video = "swf,avi,wmv,rm,mov,mpg,mpeg,ram,m4v";
String image = "jpg,jpeg,jpe,gif,png"; String image = "jpg,jpeg,jpe,gif,png";
initMediaExt(extString2extList( initMediaExt(extString2extList(

View File

@ -212,6 +212,8 @@ public class plasmaSnippetCache {
this.href = href; this.href = href;
this.name = name; this.name = name;
this.attr = attr; this.attr = attr;
if ((this.name == null) || (this.name.length() == 0)) this.name = "_";
if ((this.attr == null) || (this.attr.length() == 0)) this.attr = "_";
} }
} }
@ -623,12 +625,12 @@ public class plasmaSnippetCache {
//result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null)); //result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null));
s = removeAppearanceHashes(url, queryhashes); s = removeAppearanceHashes(url, queryhashes);
if (s.size() == 0) { if (s.size() == 0) {
result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null)); result.add(new MediaSnippet(mediatype, url, desc, null));
continue; continue;
} }
s = removeAppearanceHashes(desc, s); s = removeAppearanceHashes(desc, s);
if (s.size() == 0) { if (s.size() == 0) {
result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null)); result.add(new MediaSnippet(mediatype, url, desc, null));
continue; continue;
} }
} }
@ -651,12 +653,12 @@ public class plasmaSnippetCache {
//result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height())); //result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height()));
s = removeAppearanceHashes(url, queryhashes); s = removeAppearanceHashes(url, queryhashes);
if (s.size() == 0) { if (s.size() == 0) {
result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height())); result.add(new MediaSnippet("image", url, desc, ientry.width() + " x " + ientry.height()));
continue; continue;
} }
s = removeAppearanceHashes(desc, s); s = removeAppearanceHashes(desc, s);
if (s.size() == 0) { if (s.size() == 0) {
result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height())); result.add(new MediaSnippet("image", url, desc, ientry.width() + " x " + ientry.height()));
continue; continue;
} }
} }

View File

@ -188,7 +188,7 @@ parseableMimeTypes.URLREDIRECTOR=
# a comma-separated list of extensions that denote media file formats # a comma-separated list of extensions that denote media file formats
# this is important to recognize <a href> - tags as not-html reference # this is important to recognize <a href> - tags as not-html reference
# These files will be excluded from indexing _(Please keep extensions in alphabetical order)_ # These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip
parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp
# Promotion Strings # Promotion Strings