mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
-implemented a specialized snippet-fetch for media content
-changed search result preparation for media search presentation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3073 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
694a6e4f44
commit
e4570bffaf
|
@ -4,6 +4,7 @@ function removeAllChildren(element){
|
|||
}
|
||||
child=element.firstChild;
|
||||
while(child!=null){
|
||||
removeAllChildren(child);
|
||||
element.removeChild(child);
|
||||
child=element.firstChild;
|
||||
}
|
||||
|
|
|
@ -10,14 +10,14 @@ function AllTextSnippets() {
|
|||
}
|
||||
}
|
||||
|
||||
function AllAudioSnippets() {
|
||||
function AllMediaSnippets() {
|
||||
var query = document.getElementsByName("former")[0].value;
|
||||
|
||||
var span = document.getElementsByTagName("span");
|
||||
for(var x=0;x<span.length;x++) {
|
||||
if (span[x].className == 'snippetLoading') {
|
||||
var url = document.getElementById("url" + span[x].id);
|
||||
requestAudioSnippet(url,query);
|
||||
requestMediaSnippet(url,query);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -29,10 +29,10 @@ function requestTextSnippet(url, query){
|
|||
request.send(null);
|
||||
}
|
||||
|
||||
function requestAudioSnippet(url, query){
|
||||
function requestMediaSnippet(url, query){
|
||||
var request=createRequestObject();
|
||||
request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&remove=true&media=audio&search=' + escape(query),true);
|
||||
request.onreadystatechange = function () {handleAudioState(request)};
|
||||
request.onreadystatechange = function () {handleMediaState(request)};
|
||||
request.send(null);
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,6 @@ function handleTextState(req) {
|
|||
var snippetText = response.getElementsByTagName("text")[0].firstChild.data;
|
||||
var urlHash = response.getElementsByTagName("urlHash")[0].firstChild.data;
|
||||
var status = response.getElementsByTagName("status")[0].firstChild.data;
|
||||
var links = response.getElementsByTagName("links")[0].firstChild.data;
|
||||
|
||||
var span = document.getElementById(urlHash)
|
||||
removeAllChildren(span);
|
||||
|
@ -54,10 +53,8 @@ function handleTextState(req) {
|
|||
|
||||
if (status < 11) {
|
||||
span.className = "snippetLoaded";
|
||||
//span.setAttribute("class", "snippetLoaded");
|
||||
} else {
|
||||
span.className = "snippetError";
|
||||
//span.setAttribute("class", "snippetError");
|
||||
}
|
||||
|
||||
// replace "<b>" text by <strong> node
|
||||
|
@ -79,37 +76,23 @@ function handleTextState(req) {
|
|||
pos2=snippetText.indexOf("</b>");
|
||||
}
|
||||
|
||||
if (links > 0) {
|
||||
for (i = 0; i < links; i++) {
|
||||
var type = response.getElementsByTagName("type")[i].firstChild.data;
|
||||
var href = response.getElementsByTagName("href")[i].firstChild.data;
|
||||
var name = response.getElementsByTagName("name")[i].firstChild.data;
|
||||
var attr = response.getElementsByTagName("attr")[i].firstChild.data;
|
||||
span.appendChild(document.createElement("br"));
|
||||
var anchor = document.createElement("a");
|
||||
var hrefattr = document.createAttribute("href");
|
||||
hrefattr.nodeValue = href;
|
||||
anchor.setAttributeNode(hrefattr);
|
||||
anchor.appendChild(document.createTextNode(name));
|
||||
span.appendChild(anchor);
|
||||
}
|
||||
}
|
||||
|
||||
// add remaining string
|
||||
if (snippetText != "") {
|
||||
span.appendChild(document.createTextNode(snippetText));
|
||||
}
|
||||
}
|
||||
|
||||
function handleAudioState(req) {
|
||||
function handleMediaState(req) {
|
||||
if(req.readyState != 4){
|
||||
return;
|
||||
}
|
||||
|
||||
var response = req.responseXML;
|
||||
var urlHash = response.getElementsByTagName("urlHash")[0].firstChild.data;
|
||||
var links = response.getElementsByTagName("links")[0].firstChild.data;
|
||||
var span = document.getElementById(urlHash)
|
||||
removeAllChildren(span);
|
||||
|
||||
var snippetText = "";
|
||||
if (links > 0) {
|
||||
span.className = "snippetLoaded";
|
||||
for (i = 0; i < links; i++) {
|
||||
|
@ -117,12 +100,39 @@ function handleAudioState(req) {
|
|||
var href = response.getElementsByTagName("href")[i].firstChild.data;
|
||||
var name = response.getElementsByTagName("name")[i].firstChild.data;
|
||||
var attr = response.getElementsByTagName("attr")[i].firstChild.data;
|
||||
|
||||
var nameanchor = document.createElement("a");
|
||||
nameanchor.setAttribute("href", href);
|
||||
nameanchor.appendChild(document.createTextNode(name));
|
||||
|
||||
var linkanchor = document.createElement("a");
|
||||
linkanchor.setAttribute("href", href);
|
||||
linkanchor.appendChild(document.createTextNode(href));
|
||||
|
||||
var col1 = document.createElement("td");
|
||||
var width1 = document.createAttribute("width");
|
||||
width1.nodeValue = 200;
|
||||
col1.setAttributeNode(width1);
|
||||
col1.appendChild(nameanchor);
|
||||
var col2 = document.createElement("td");
|
||||
var width2 = document.createAttribute("width");
|
||||
width2.nodeValue = 500;
|
||||
col2.setAttributeNode(width2);
|
||||
col2.appendChild(linkanchor);
|
||||
|
||||
var row = document.createElement("tr");
|
||||
row.setAttribute("class", "TableCellDark");
|
||||
row.appendChild(col1);
|
||||
row.appendChild(col2);
|
||||
|
||||
var table = document.createElement("table");
|
||||
table.appendChild(row);
|
||||
span.appendChild(table);
|
||||
}
|
||||
} else {
|
||||
span.className = "snippetError";
|
||||
span.appendChild(document.createTextNode(""));
|
||||
}
|
||||
|
||||
span.appendChild(document.createTextNode(snippetText));
|
||||
}
|
||||
|
||||
function addHover() {
|
||||
|
|
|
@ -64,7 +64,7 @@ public class snippet {
|
|||
|
||||
|
||||
// attach link information
|
||||
ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, false, 1000);
|
||||
ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, true, 1000);
|
||||
plasmaSnippetCache.MediaSnippet ms;
|
||||
for (int i = 0; i < mediaSnippets.size(); i++) {
|
||||
ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i);
|
||||
|
@ -73,7 +73,7 @@ public class snippet {
|
|||
prop.put("link_" + i + "_name", ms.name);
|
||||
prop.put("link_" + i + "_attr", ms.attr);
|
||||
}
|
||||
System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS");
|
||||
System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString);
|
||||
prop.put("link", mediaSnippets.size());
|
||||
prop.put("links", mediaSnippets.size());
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ document.searchform.Enter.value = "search again - catch up more links";
|
|||
</script>
|
||||
|
||||
#(type)#
|
||||
<!-- type 0: text search -->
|
||||
#(excluded)#
|
||||
::
|
||||
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
|
||||
|
@ -144,7 +145,50 @@ You can enrich the search results by using the 'global' option; you must also sw
|
|||
(by using the proxy) to contribute to the global index.
|
||||
#(/resultbottomline)#
|
||||
</p>
|
||||
::<!-- type 1: media search -->
|
||||
#(excluded)#
|
||||
::
|
||||
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
|
||||
#(/excluded)#
|
||||
#(num-results)#
|
||||
::
|
||||
<p>No Results.</p>
|
||||
::
|
||||
<p>No Results. (length of search words must be at least 3 characters)</p>
|
||||
::
|
||||
<p>No Results. If you think this is unsatisfactory then you may consider to support the global index by running your own proxy/peer.
|
||||
If everybody contributes, the results will get better.</p>
|
||||
<p>Other possible reasons for no result:<p>
|
||||
<ul>
|
||||
<li>The search time was too short. Search again with same query to catch up 'late peers'</li>
|
||||
<li>There is currently no support for german umlaute. Please use ae/oe/ue instead</li>
|
||||
<li>Words of length < 3 are not indexed. Please omit such words</li>
|
||||
<li>YaCy tries to index singular instead of plural words. Please use the singular form</li>
|
||||
<li>Only complete words are indexed, not parts of words</li>
|
||||
<li>Don't use stopwords as search words</li>
|
||||
<li>During this test phase the reaction time of remote peers is unknown.
|
||||
Please repeat your search to see if there are late-responses from remote peers</li>
|
||||
</ul>
|
||||
<p>If you think the information you searched should exist in the global index,
|
||||
then please run your own peer and start a crawl of your wanted information to make it
|
||||
available for everyone. Then stay online to support crawls from other peers. Thank you!</p>
|
||||
::
|
||||
<p><strong>#[linkcount]#</strong> results from <strong>#[orderedcount]#</strong> ordered links of a total number of <strong>#[totalcount]#</strong> known.</p>
|
||||
#(/num-results)#
|
||||
<!-- linklist begin -->
|
||||
#{results}#
|
||||
<!-- link begin -->
|
||||
<span class="snippetLoading" id="#[urlhash]#">loading snippet from <a href="#[url]#" id="url#[urlhash]#">#[urlname]#</a><br /></span>
|
||||
<!-- link end -->
|
||||
#{/results}#
|
||||
<script type="text/javascript">
|
||||
AllMediaSnippets();
|
||||
addHover();
|
||||
</script>
|
||||
|
||||
<!-- linklist end -->
|
||||
::<!-- type 2: image serch: presents image thumbnails -->
|
||||
::<!-- type 3: image thumbnail list for one single url -->
|
||||
<table border="0" cellspacing="16" cellpadding="0">
|
||||
#{results}#
|
||||
<tr valign="bottom">
|
||||
|
@ -157,39 +201,13 @@ You can enrich the search results by using the 'global' option; you must also sw
|
|||
#{/results}#
|
||||
</table>
|
||||
#(/type)#
|
||||
<p class="info">YaCy is a GPL'ed project
|
||||
with the target of implementing a P2P-based global search engine.<br />
|
||||
Architecture (C) by Michael Peter Christen,
|
||||
<img src="/env/grafics/mcemailh.gif" alt="Mail-Adresse von Michael Peter Christen" /></p>
|
||||
<p class="info">
|
||||
YaCy is a GPL'ed project with the target of implementing a P2P-based global search engine.<br />
|
||||
Architecture (C) by Michael Peter Christen, <img src="/env/grafics/mcemailh.gif" alt="Mail-Adresse von Michael Peter Christen" />
|
||||
</p>
|
||||
#(display)#
|
||||
#%env/templates/simplefooter.template%#
|
||||
::
|
||||
<p><strong>Refine your search with these topwords</strong>:</p>
|
||||
<p>
|
||||
#{words}#
|
||||
<a href="yacysearch.html?search=#[newsearch]#&Enter=Search&count=#[count]#&order=#[order]#&resource=#[resource]#&time=#[time]#">#[word]#</a>
|
||||
#{/words}#
|
||||
</p>
|
||||
#(/combine)#
|
||||
<!-- linklist begin -->
|
||||
|
||||
#{results}#
|
||||
<!-- link begin -->
|
||||
<div class="searchresults">
|
||||
<p class="snippet"><span class="#(snippet)#snippetLoading::snippetLoaded#(/snippet)#" id="#[urlhash]#">#(snippet)#loading snippet ...::#[text]##(/snippet)#</span></p>
|
||||
</div>
|
||||
<!-- link end -->
|
||||
#{/results}#
|
||||
|
||||
<script type="text/javascript">
|
||||
AllTextSnippets();
|
||||
addHover();
|
||||
</script>
|
||||
|
||||
<!-- linklist end -->
|
||||
<p>
|
||||
#(resultbottomline)#
|
||||
::
|
||||
#%env/templates/footer.template%#
|
||||
#(/display)#
|
||||
|
||||
|
|
|
@ -144,7 +144,7 @@ public class yacysearch {
|
|||
}
|
||||
if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
|
||||
|
||||
final int count = Integer.parseInt(post.get("count", "10"));
|
||||
int count = Integer.parseInt(post.get("count", "10"));
|
||||
final String order = post.get("order", plasmaSearchPreOrder.canUseYBR() ? "YBR-Date-Quality" : "Date-Quality-YBR");
|
||||
boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
|
||||
final boolean indexof = post.get("indexof","").equals("on");
|
||||
|
@ -178,6 +178,9 @@ public class yacysearch {
|
|||
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
|
||||
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
|
||||
|
||||
// patch until better search profiles are available
|
||||
if ((contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 50;
|
||||
|
||||
serverObjects prop = new serverObjects();
|
||||
if (post.get("cat", "href").equals("href")) {
|
||||
|
||||
|
@ -368,7 +371,7 @@ public class yacysearch {
|
|||
}
|
||||
}
|
||||
|
||||
prop.put("type", "0");
|
||||
prop.put("type", (thisSearch.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : 1);
|
||||
prop.put("cat", "href");
|
||||
prop.put("depth", "0");
|
||||
|
||||
|
@ -406,7 +409,7 @@ public class yacysearch {
|
|||
}
|
||||
prop.put("type_results", line);
|
||||
|
||||
prop.put("type", 1); // set type of result: image list
|
||||
prop.put("type", 3); // set type of result: image list
|
||||
prop.put("cat", "href");
|
||||
prop.put("depth", depth);
|
||||
}
|
||||
|
|
|
@ -172,8 +172,8 @@ public final class plasmaParser {
|
|||
* @see #initMediaExt(String)
|
||||
*/
|
||||
static {
|
||||
String apps = "sit,hqx,img,dmg,exe,com,bat,sh";
|
||||
String audio = "mp2,mp3,ogg,aac,aif,aiff,wav";
|
||||
String apps = "sit,hqx,img,dmg,exe,com,bat,sh,zip,jar";
|
||||
String audio = "mp2,mp3,ogg,aac,aif,aiff,wav,ogg";
|
||||
String video = "swf,avi,wmv,rm,mov,mpg,mpeg,ram,m4v";
|
||||
String image = "jpg,jpeg,jpe,gif,png";
|
||||
initMediaExt(extString2extList(
|
||||
|
|
|
@ -212,6 +212,8 @@ public class plasmaSnippetCache {
|
|||
this.href = href;
|
||||
this.name = name;
|
||||
this.attr = attr;
|
||||
if ((this.name == null) || (this.name.length() == 0)) this.name = "_";
|
||||
if ((this.attr == null) || (this.attr.length() == 0)) this.attr = "_";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -623,12 +625,12 @@ public class plasmaSnippetCache {
|
|||
//result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null));
|
||||
s = removeAppearanceHashes(url, queryhashes);
|
||||
if (s.size() == 0) {
|
||||
result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null));
|
||||
result.add(new MediaSnippet(mediatype, url, desc, null));
|
||||
continue;
|
||||
}
|
||||
s = removeAppearanceHashes(desc, s);
|
||||
if (s.size() == 0) {
|
||||
result.add(new MediaSnippet(mediatype, url, (desc.length() == 0) ? url : desc, null));
|
||||
result.add(new MediaSnippet(mediatype, url, desc, null));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -651,12 +653,12 @@ public class plasmaSnippetCache {
|
|||
//result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height()));
|
||||
s = removeAppearanceHashes(url, queryhashes);
|
||||
if (s.size() == 0) {
|
||||
result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height()));
|
||||
result.add(new MediaSnippet("image", url, desc, ientry.width() + " x " + ientry.height()));
|
||||
continue;
|
||||
}
|
||||
s = removeAppearanceHashes(desc, s);
|
||||
if (s.size() == 0) {
|
||||
result.add(new MediaSnippet("image", url, (desc.length() == 0) ? url : desc, ientry.width() + " x " + ientry.height()));
|
||||
result.add(new MediaSnippet("image", url, desc, ientry.width() + " x " + ientry.height()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -188,7 +188,7 @@ parseableMimeTypes.URLREDIRECTOR=
|
|||
# a comma-separated list of extensions that denote media file formats
|
||||
# this is important to recognize <a href> - tags as not-html reference
|
||||
# These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
|
||||
mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip
|
||||
mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip
|
||||
parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp
|
||||
|
||||
# Promotion Strings
|
||||
|
|
Loading…
Reference in New Issue
Block a user