This commit is contained in:
sixcooler 2015-10-03 11:09:16 +02:00
commit 839d710105
12 changed files with 36 additions and 14 deletions

View File

@ -66,6 +66,7 @@ import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Fulltext;
import net.yacy.search.index.Segment;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -76,8 +77,14 @@ public class Crawler_p {
// this servlet starts a web crawl. The interface for entering the web crawl parameters is in IndexCreate_p.html
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
// clean up all search events
SearchEventCache.cleanupEvents(true);
sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
// inital values for AJAX Elements (without JavaScript)
final serverObjects prop = new serverObjects();
prop.put("rejected", 0);

View File

@ -136,8 +136,11 @@ public class ViewImage {
// gif images are not loaded because of an animated gif bug within jvm which sends java into an endless loop with high CPU
if (ext.equals("gif") && "gif".equals(MultiProtocolURL.getFileExtension(url.getFileName()))) {
return new ByteArrayInputStream(resourceb);
} else if (ext.equals("svg") && "svg".equals(MultiProtocolURL.getFileExtension(url.getFileName()))) {
// svg images not supported by awt, but by most browser, deliver just content (without crop/scale)
return new ByteArrayInputStream(resourceb);
}
// read image
image = ImageParser.parse(urlString, resourceb);
if (image == null) {

View File

@ -308,16 +308,15 @@ public class yacysearchitem {
// image search; shows thumbnails
prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content
SearchEvent.ImageResult image = null;
try {
image = theSearch.oneImageResult(item, timeout);
SearchEvent.ImageResult image = theSearch.oneImageResult(item, timeout);
final String imageUrlstring = image.imageUrl.toNormalform(true);
final String imageUrlExt = MultiProtocolURL.getFileExtension(image.imageUrl.getFileName());
final String target = sb.getConfig(imageUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
final String license = URLLicense.aquireLicense(image.imageUrl); // this is just the license key to get the image forwarded through the YaCy thumbnail viewer, not an actual lawful license
//sb.loader.loadIfNotExistBackground(image.imageUrl, 1024 * 1024 * 10, null, ClientIdentification.yacyIntranetCrawlerAgent);
prop.putHTML("content_item_hrefCache", "/ViewImage." + ("gif".equals(imageUrlExt) ? "gif" : "png") + "?maxwidth=128&maxheight=128&code="+license+"&isStatic=true&quadratic=&url=" + imageUrlstring);
prop.putHTML("content_item_hrefCache", "ViewImage." + ("gif.png.svg".contains(imageUrlExt) ? imageUrlExt : "png") + "?maxwidth=128&maxheight=128&code="+license+"&isStatic=true&quadratic=&url=" + imageUrlstring);
prop.putHTML("content_item_href", imageUrlstring);
prop.putHTML("content_item_target", target);
prop.put("content_item_code", license);

View File

@ -91,10 +91,10 @@ public class Classification {
final String apps = "7z,ace,arc,arj,apk,asf,asx,bat,bin,bkf,bz2,cab,com,css,dcm,deb,dll,dmg,exe,java,gho,ghs,gz,hqx,img,iso,jar,lha,rar,sh,sit,sitx,tar,tbz,tgz,tib,torrent,vbs,war,zip";
final String audio = "aac,aif,aiff,flac,m4a,m4p,mid,mp2,mp3,oga,ogg,ram,sid,wav,wma";
final String video = "3g2,3gp,3gp2,3gpp,3gpp2,3ivx,asf,asx,avi,div,divx,dv,dvx,env,f4v,flv,hdmov,m1v,m4v,m-jpeg,mkv,moov,mov,movie,mp2v,mp4,mpe,mpeg,mpg,mpg4,mv4,ogm,ogv,qt,rm,rv,vid,swf,webm,wmv";
final String image = "ai,bmp,cdr,cmx,emf,eps,gif,img,jpeg,jpg,mng,pct,pdd,pdn,pict,png,psb,psd,psp,tif,tiff,wmf";
final String image = "ai,bmp,cdr,cmx,emf,eps,gif,img,jpeg,jpg,mng,pct,pdd,pdn,pict,png,psb,psd,psp,svg,tif,tiff,wmf";
final String ctrl = "sha1,md5,crc32,sfv";
addSet(textExtSet, text); // image formats
addSet(textExtSet, text); // text formats
addSet(imageExtSet, image); // image formats
addSet(audioExtSet, audio); // audio formats
addSet(videoExtSet, video); // video formats

View File

@ -995,7 +995,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
// construct a String again
sb.setLength(0);
for (final String v: token) if (v.length() > 1) sb.append(v).append(' ');
for (final String v: token) if (v.length() >= 1) sb.append(v).append(' ');
return sb.length() == 0 ? "" : sb.substring(0, sb.length() - 1);
}

View File

@ -311,7 +311,6 @@ public class DCEntry extends MultiMapSolrParams {
public double getLat() {
String t = this.get("geo:lat");
if (t == null) t = this.get("geo:lat");
t = stripCDATA(t);
if (t == null) return 0.0d;
return Double.parseDouble(t);

View File

@ -169,7 +169,7 @@ public class metadataImageParser extends AbstractParser implements Parser {
singleList(title), // title
author == null ? "" : author, // author
location.getHost(), // Publisher
new String[]{}, // sections
null, // sections
descriptions, // description
gpslon, gpslat, // location
imgInfotxt.toString(), // content text

View File

@ -94,7 +94,7 @@ public class rssParser extends AbstractParser implements Parser {
singleList(item.getTitle()),
item.getAuthor(),
item.getCopyright(),
new String[0],
null,
item.getDescriptions(),
item.getLon(),
item.getLat(),

View File

@ -96,7 +96,7 @@ public class sitemapParser extends AbstractParser implements Parser {
singleList(""),
"",
"",
new String[0],
null,
new ArrayList<String>(),
0.0f, 0.0f,
null,

View File

@ -81,7 +81,6 @@ public class swfParser extends AbstractParser implements Parser {
String url = null;
String urlnr = null;
final String linebreak = System.getProperty("line.separator");
final String[] sections = null;
final List<String> abstrct = new ArrayList<String>();
//TreeSet images = null;
final List<AnchorURL> anchors = new ArrayList<AnchorURL>();
@ -100,7 +99,7 @@ public class swfParser extends AbstractParser implements Parser {
while ((urlStart = contents.indexOf("http://",urlEnd)) >= 0){
urlEnd = contents.indexOf(linebreak,urlStart);
url = contents.substring(urlStart,urlEnd);
urlnr = Integer.toString(++urls).toString();
urlnr = Integer.toString(++urls);
AnchorURL u = new AnchorURL(url);
u.setNameProperty(urlnr);
anchors.add(u);
@ -122,7 +121,7 @@ public class swfParser extends AbstractParser implements Parser {
replaceAll("\t"," ")), // title
"", // TODO: AUTHOR
"",
sections, // an array of section headlines
null, // an array of section headlines
abstrct, // an abstract
0.0f, 0.0f,
contents, // the parsed document text

View File

@ -2492,6 +2492,10 @@ public final class Switchboard extends serverSwitch {
}
}
if (allCrawlsFinished) {
// refresh the search cache
SearchEventCache.cleanupEvents(true);
sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
if (postprocessing) {
// run postprocessing on all profiles
ReferenceReportCache rrCache = index.getReferenceReportCache();

View File

@ -184,6 +184,17 @@ public class QueryGoal {
}
}
}
// in case that the include_string contains several entries including 1-char tokens and also more-than-1-char tokens,
// then remove the 1-char tokens to prevent that we are to strict. This will make it possible to be a bit more fuzzy
// in the search where it is appropriate
boolean contains_single = false, contains_multiple = false;
for (String token: include_string) {
if (token.length() == 1) contains_single = true; else contains_multiple = true;
}
if (contains_single && contains_multiple) {
Iterator<String> i = include_string.iterator();
while (i.hasNext()) if (i.next().length() == 1) i.remove();
}
}
/**