mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-21 00:00:13 +02:00
d6b82840f8
This uses an enhanced version of the Nutch/Solr TextProfileSignatue. As a result, a signature of the document is written to the solr search index. Additionally for each time when a signature is written, it is checked if the singature exists already in the index. If the signature does not exist, the document is marked as unique. The unique attribute can now be used to sort document lists and bring duplicates to the end of a result list. To enable this, a large portion of the search api to Solr had to be changed. This affected mainly caching of 'exists' searches to enhance the check for existing signatures and do this without actually doing a solr query. Because here the first time a long number is used as value in the Solr store, also the value naming in the YaCySchema had to be adopted and normalized. This caused that many files had to be changed.
147 lines
8.1 KiB
Java
147 lines
8.1 KiB
Java
// index.java
|
|
// (C) 2004-2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
|
// first published 2004 on http://www.anomic.de
|
|
//
|
|
// This is a part of YaCy, a peer-to-peer based web search engine
|
|
//
|
|
// $LastChangedDate$
|
|
// $LastChangedRevision$
|
|
// $LastChangedBy$
|
|
//
|
|
// LICENSE
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
//
|
|
// You must compile this file with
|
|
// javac -classpath .:../classes index.java
|
|
// if the shell's current path is HTROOT
|
|
|
|
|
|
import net.yacy.cora.document.analysis.Classification;
|
|
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
|
import net.yacy.cora.protocol.RequestHeader;
|
|
import net.yacy.search.Switchboard;
|
|
import net.yacy.search.SwitchboardConstants;
|
|
import net.yacy.search.ranking.BlockRank;
|
|
import net.yacy.server.serverObjects;
|
|
import net.yacy.server.serverSwitch;
|
|
|
|
public class index {
|
|
|
|
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
|
final Switchboard sb = (Switchboard) env;
|
|
final serverObjects prop = new serverObjects();
|
|
BlockRank.ensureLoaded(); // lazy initialization of block rank tables
|
|
|
|
final String forwardTarget = sb.getConfig(SwitchboardConstants.INDEX_FORWARD, "");
|
|
if (forwardTarget.length() > 0) {
|
|
// forward the page
|
|
prop.put("forward", 1);
|
|
prop.put("forward_target", forwardTarget);
|
|
return prop;
|
|
}
|
|
|
|
// access control
|
|
final boolean authorizedAccess = sb.verifyAuthentication(header);
|
|
if ((post != null) && (post.containsKey("publicPage"))) {
|
|
if (!authorizedAccess) {
|
|
prop.authenticationRequired();
|
|
return prop;
|
|
}
|
|
}
|
|
|
|
if (authorizedAccess) {
|
|
sb.index.fulltext().commit(); // call this only as superuser to prevent that this can be misused for DoS
|
|
}
|
|
|
|
boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
|
|
final boolean focus = (post == null) ? true : post.get("focus", "1").equals("1");
|
|
|
|
int searchoptions = (post == null) ? 0 : Math.min(1, post.getInt("searchoptions", 0));
|
|
if (!sb.getConfigBool("search.options", true)) searchoptions = 0;
|
|
final String former = (post == null) ? "" : post.get("former", "");
|
|
final int count = Math.min(100, (post == null) ? 10 : post.getInt("count", 10));
|
|
final int maximumRecords = sb.getConfigInt(SwitchboardConstants.SEARCH_ITEMS, 10);
|
|
final String prefermaskfilter = (post == null) ? "" : post.get("prefermaskfilter", "");
|
|
final String constraint = (post == null) ? "" : post.get("constraint", "");
|
|
final String cat = (post == null) ? "href" : post.get("cat", "href");
|
|
final int type = (post == null) ? 0 : post.getInt("type", 0);
|
|
|
|
//final boolean indexDistributeGranted = sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW, true);
|
|
final boolean indexReceiveGranted = sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true) ||
|
|
sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true);
|
|
global = global && indexReceiveGranted;
|
|
|
|
// search domain
|
|
Classification.ContentDomain contentdom = ContentDomain.TEXT;
|
|
final String cds = (post == null) ? "text" : post.get("contentdom", "text");
|
|
if (cds.equals("text")) contentdom = ContentDomain.TEXT;
|
|
if (cds.equals("audio")) contentdom = ContentDomain.AUDIO;
|
|
if (cds.equals("video")) contentdom = ContentDomain.VIDEO;
|
|
if (cds.equals("image")) contentdom = ContentDomain.IMAGE;
|
|
if (cds.equals("app")) contentdom = ContentDomain.APP;
|
|
|
|
// we create empty entries for template strings
|
|
String promoteSearchPageGreeting = env.getConfig(SwitchboardConstants.GREETING, "");
|
|
if (env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) promoteSearchPageGreeting = env.getConfig("network.unit.description", "");
|
|
prop.put(SwitchboardConstants.GREETING, promoteSearchPageGreeting);
|
|
prop.put(SwitchboardConstants.GREETING_HOMEPAGE, sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, ""));
|
|
prop.put(SwitchboardConstants.GREETING_LARGE_IMAGE, sb.getConfig(SwitchboardConstants.GREETING_LARGE_IMAGE, ""));
|
|
prop.putHTML("former", former);
|
|
prop.put("num-results", "0");
|
|
prop.put("excluded", "0");
|
|
prop.put("combine", "0");
|
|
prop.put("resultbottomline", "0");
|
|
prop.put("searchoptions", searchoptions);
|
|
prop.put("searchoptions_maximumRecords", maximumRecords);
|
|
prop.put("searchoptions_count-10", (count == 10) ? "1" : "0");
|
|
prop.put("searchoptions_count-50", (count == 50) ? "1" : "0");
|
|
prop.put("searchoptions_count-100", (count == 100) ? "1" : "0");
|
|
prop.put("searchoptions_resource-select", (sb.peers == null || sb.peers.sizeConnected() == 0 || !global) ? 0 : 1);
|
|
prop.put("searchoptions_resource-select_global", global ? "1" : "0");
|
|
prop.put("searchoptions_resource-select_global-disabled", indexReceiveGranted ? "0" : "1");
|
|
prop.put("searchoptions_resource-select_local", global ? "0" : "1");
|
|
prop.put("searchoptions_prefermaskoptions", "0");
|
|
prop.putHTML("searchoptions_prefermaskoptions_prefermaskfilter", prefermaskfilter);
|
|
prop.put("searchoptions_indexofChecked", "");
|
|
prop.put("results", "");
|
|
prop.putHTML("cat", cat);
|
|
prop.put("type", type);
|
|
prop.put("depth", "0");
|
|
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
|
|
prop.put("focus", focus ? 1 : 0);
|
|
prop.put("pi", sb.getConfigBool("publicAdministratorPi", false) ? 1 : 0);
|
|
prop.putHTML("constraint", constraint);
|
|
prop.put("searchdomswitches", sb.getConfigBool("search.text", true) || sb.getConfigBool("search.audio", true) || sb.getConfigBool("search.video", true) || sb.getConfigBool("search.image", true) || sb.getConfigBool("search.app", true) ? 1 : 0);
|
|
prop.put("searchdomswitches_searchoptions", searchoptions);
|
|
prop.put("searchdomswitches_searchtext", sb.getConfigBool("search.text", true) ? 1 : 0);
|
|
prop.put("searchdomswitches_searchaudio", sb.getConfigBool("search.audio", true) ? 1 : 0);
|
|
prop.put("searchdomswitches_searchvideo", sb.getConfigBool("search.video", true) ? 1 : 0);
|
|
prop.put("searchdomswitches_searchimage", sb.getConfigBool("search.image", true) ? 1 : 0);
|
|
prop.put("searchdomswitches_searchapp", sb.getConfigBool("search.app", true) ? 1 : 0);
|
|
prop.put("searchdomswitches_searchtext_check", (contentdom == ContentDomain.TEXT) ? "1" : "0");
|
|
prop.put("searchdomswitches_searchaudio_check", (contentdom == ContentDomain.AUDIO) ? "1" : "0");
|
|
prop.put("searchdomswitches_searchvideo_check", (contentdom == ContentDomain.VIDEO) ? "1" : "0");
|
|
prop.put("searchdomswitches_searchimage_check", (contentdom == ContentDomain.IMAGE) ? "1" : "0");
|
|
prop.put("searchdomswitches_searchapp_check", (contentdom == ContentDomain.APP) ? "1" : "0");
|
|
prop.put("search.navigation", sb.getConfig("search.navigation", "all") );
|
|
prop.put("search.verify", sb.getConfig("search.verify", "iffresh") );
|
|
// online caution timing
|
|
sb.localSearchLastAccess = System.currentTimeMillis();
|
|
|
|
return prop;
|
|
}
|
|
}
|