- modified zone navigation (does still not work correctly)

- added dht switch in network definition
- 0.574

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4550 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2008-03-11 11:09:38 +00:00
parent 8d0470a5c6
commit b4ed937f1e
13 changed files with 94 additions and 105 deletions

View File

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5
# Release Configuration
releaseVersion=0.573
releaseVersion=0.574
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

View File

@ -54,6 +54,7 @@ import de.anomic.tools.crypt;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNetwork;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public final class search {
@ -153,7 +154,7 @@ public final class search {
plasmaSearchEvent theSearch = null;
if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts
theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet<String>(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, client);
theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet<String>(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, yacyURL.TLD_any_zone_filter, client);
theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
@ -178,7 +179,7 @@ public final class search {
} else {
// retrieve index containers from search request
theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, client);
theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, yacyURL.TLD_any_zone_filter, client);
theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");

View File

@ -182,6 +182,7 @@ public class result {
20,
constraint,
true,
yacyURL.TLD_any_zone_filter,
client);

View File

@ -26,9 +26,30 @@
<p><strong>Language Zone</strong>:
<select onchange="window.location.href=this.options[this.selectedIndex].value">
<option selected="selected">-select-</option>
#{zones}#
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=#[zonecode]#">#[zone]#</option>
#{/zones}#
#(All)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=255">all languages, all regions (#[count]#)</option>
#(/All)#
#(EuropeRussia)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=1">european languages but no english (#[count]#)</option>
#(/EuropeRussia)#
#(NorthAmericaOceania)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=80">english and generic domains (#[count]#)</option>
#(/NorthAmericaOceania)#
#(MiddleSouthAmerica)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=2">spanish-speaking countries (#[count]#)</option>
#(/MiddleSouthAmerica)#
#(SouthEastAsia)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=4">from asia (#[count]#)</option>
#(/SouthEastAsia)#
#(MiddleEastWestAsia)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=8">from the middle east (#[count]#)</option>
#(/MiddleEastWestAsia)#
#(Africa)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=32">from Africa (#[count]#)</option>
#(/Africa)#
#(Intranet)#::
<option value="/yacy/user/ysearch.html?search=#[search]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;zone=128">intranet content (#[count]#)</option>
#(/Intranet)#
</select>
</p>
#(/languagezone)#

View File

@ -25,7 +25,6 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
@ -130,24 +129,24 @@ public class sidebar_navigation {
}
// compose language zone drill-down
int c = 0;
final Iterator<Map.Entry<String, Integer>> iter = theSearch.getRankingResult().getZoneStatistics().entrySet().iterator();
Map.Entry<String, Integer> entry;
while (iter.hasNext()) {
entry = iter.next();
if ((theQuery == null) || (theQuery.queryString == null)) break;
prop.putHTML("navigation_languagezone_zones_" + c + "_zone", entry.getKey() + " (" + entry.getValue() + ")");
prop.putHTML("navigation_languagezone_zones_" + c + "_search", theQuery.queryString.replace(' ', '+'));
prop.put("navigation_languagezone_zones_" + c + "_count", theQuery.displayResults());
prop.put("navigation_languagezone_zones_" + c + "_offset", "0");
prop.put("navigation_languagezone_zones_" + c + "_contentdom", theQuery.contentdom());
prop.put("navigation_languagezone_zones_" + c + "_resource", theQuery.searchdom());
prop.put("navigation_languagezone_zones_" + c + "_zonecode", yacyURL.zone2map.get(entry.getKey()).intValue());
prop.put("navigation_languagezone_zones", c);
c++;
}
prop.put("navigation_languagezone", (c > 2) ? "1" : "0");
final int[] zones = theSearch.getRankingResult().zones();
boolean z = false;
domzone(prop, "All", theSearch.getRankingResult().size(), theQuery);
if (zones[yacyURL.TLD_EuropeRussia_ID] > 0)
{ z = true; domzone(prop, "EuropeRussia", zones[yacyURL.TLD_EuropeRussia_ID], theQuery);}
if (zones[yacyURL.TLD_MiddleSouthAmerica_ID] > 0)
{ z = true; domzone(prop, "MiddleSouthAmerica", zones[yacyURL.TLD_MiddleSouthAmerica_ID], theQuery);}
if (zones[yacyURL.TLD_SouthEastAsia_ID] > 0)
{ z = true; domzone(prop, "SouthEastAsia", zones[yacyURL.TLD_SouthEastAsia_ID], theQuery);}
if (zones[yacyURL.TLD_MiddleEastWestAsia_ID] > 0)
{ z = true; domzone(prop, "MiddleEastWestAsia_", zones[yacyURL.TLD_MiddleEastWestAsia_ID], theQuery);}
if (zones[yacyURL.TLD_NorthAmericaOceania_ID] + zones[yacyURL.TLD_Generic_ID] > 0)
{ z = true; domzone(prop, "NorthAmericaOceania", zones[yacyURL.TLD_NorthAmericaOceania_ID] + zones[yacyURL.TLD_Generic_ID], theQuery);}
if (zones[yacyURL.TLD_Africa_ID] > 0)
{ z = true; domzone(prop, "Africa", zones[yacyURL.TLD_Africa_ID], theQuery);}
if (zones[7] > 0)
{ z = true; domzone(prop, "Intranet", zones[7], theQuery);}
prop.put("navigation_languagezone", (z) ? "1" : "0");
// compose page navigation
StringBuffer resnav = new StringBuffer();
@ -191,4 +190,13 @@ public class sidebar_navigation {
"&amp;former=" + theQuery.queryString() + "\">";
}
private static void domzone(serverObjects prop, String zonename, int zonecount, plasmaSearchQuery theQuery) {
prop.put("navigation_languagezone_" + zonename + "_count", zonecount);
prop.putHTML("navigation_languagezone_" + zonename + "_search", theQuery.queryString.replace(' ', '+'));
prop.put("navigation_languagezone_" + zonename + "_offset", "0");
prop.put("navigation_languagezone_" + zonename + "_contentdom", theQuery.contentdom());
prop.put("navigation_languagezone_" + zonename + "_resource", theQuery.searchdom());
prop.put("navigation_languagezone_" + zonename, 1);
}
}

View File

@ -123,6 +123,8 @@ public class ysearch {
constraint.set(plasmaCondenser.flag_cat_indexof, true);
}
int domainzone = post.getInt("zone", yacyURL.TLD_any_zone_filter);
// SEARCH
//final boolean indexDistributeGranted = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true");
//final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
@ -182,6 +184,7 @@ public class ysearch {
20,
constraint,
true,
domainzone,
client);

View File

@ -257,6 +257,7 @@ public class yacysearch {
20,
constraint,
true,
yacyURL.TLD_any_zone_filter,
client);

View File

@ -118,7 +118,7 @@ public final class plasmaSearchQuery {
this.offset = 0;
this.urlMask = ".*";
this.domType = SEARCHDOM_LOCAL;
this.zonecode = yacyURL.language_domain_any_zone;
this.zonecode = yacyURL.TLD_any_zone_filter;
this.domMaxTargets = 0;
this.constraint = constraint;
this.allofconstraint = false;
@ -136,6 +136,7 @@ public final class plasmaSearchQuery {
int lines, int offset, String urlMask,
int domType, String domGroupName, int domMaxTargets,
kelondroBitfield constraint, boolean allofconstraint,
int domainzone,
String host) {
this.queryString = queryString;
this.queryHashes = queryHashes;
@ -149,7 +150,7 @@ public final class plasmaSearchQuery {
//this.maximumTime = Math.min(6000, maximumTime);
this.urlMask = urlMask;
this.domType = domType;
this.zonecode = yacyURL.language_domain_any_zone;
this.zonecode = domainzone;
this.domMaxTargets = domMaxTargets;
this.constraint = constraint;
this.allofconstraint = allofconstraint;
@ -288,9 +289,9 @@ public final class plasmaSearchQuery {
public String id(boolean anonymized) {
// generate a string that identifies a search so results can be re-used in a cache
if (anonymized) {
return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + ":" + this.contentdom + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString());
return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString();
} else {
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString());
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString();
}
}

View File

@ -99,6 +99,10 @@ public final class plasmaSearchRankingProcess {
return order.cardinal(word);
}
public int[] zones() {
return this.domZones;
}
public void execQuery() {
long timer = System.currentTimeMillis();
@ -334,10 +338,6 @@ public final class plasmaSearchRankingProcess {
return this.local_resourceSize;
}
public Map<String, Integer> getZoneStatistics() {
return yacyURL.zoneStatistics(this.domZones);
}
public indexRWIEntry remove(String urlHash) {
kelondroSortStack<indexRWIVarEntry>.stackElement se = stack.remove(urlHash.hashCode());
if (se == null) return null;

View File

@ -2658,8 +2658,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (yacyCore.seedDB.noDHTActivity()) {
return "no DHT distribution: network too small";
}
if (!this.getConfigBool("network.unit.dht", true)) {
return "no DHT distribution: disabled by network.unit.dht";
}
if (getConfig(INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false")) {
return "no DHT distribution: not enabled";
return "no DHT distribution: not enabled (ser setting)";
}
if (wordIndex.loadedURL.size() < 10) {
return "no DHT distribution: loadedURL.size() = " + wordIndex.loadedURL.size();
@ -2673,7 +2676,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if ((getConfig(INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (sbQueue.size() > 1)) {
return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + sbQueue.size();
}
return null;
return null; // this means; yes, please do dht transfer
}
public boolean dhtTransferJob() {

View File

@ -90,7 +90,7 @@ public final class yacySeedDB {
* these hashes all shall be generated by base64.enhancedCoder
*/
public static final int commonHashLength = 12;
public static final int dhtActivityMagic = 32;
public static final int dhtActivityMagic = 48;
public static final String[] sortFields = new String[] {yacySeed.LCOUNT, yacySeed.ICOUNT, yacySeed.UPTIME, yacySeed.VERSION, yacySeed.LASTSEEN};
public static final String[] longaccFields = new String[] {yacySeed.LCOUNT, yacySeed.ICOUNT, yacySeed.ISPEED};

View File

@ -31,7 +31,6 @@ import java.io.File;
import java.net.MalformedURLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -44,10 +43,18 @@ import de.anomic.tools.Punycode.PunycodeException;
public class yacyURL {
// TLD separation in political and cultural parts
// https://www.cia.gov/cia/publications/factbook/index.html
// http://en.wikipedia.org/wiki/List_of_countries_by_continent
public static final int TLD_EuropeRussia_ID = 0; // European languages but no english
public static final int TLD_MiddleSouthAmerica_ID = 1; // mainly spanish-speaking countries
public static final int TLD_SouthEastAsia_ID = 2; // asia
public static final int TLD_MiddleEastWestAsia_ID = 3; // middle east
public static final int TLD_NorthAmericaOceania_ID = 4; // english-speaking countries
public static final int TLD_Africa_ID = 5; // africa
public static final int TLD_Generic_ID = 6; // anything else, mixed languages, mainly english
public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
private static final String[] TLD_NorthAmericaOceania={
// primary english-speaking countries
@ -160,6 +167,7 @@ public class yacyURL {
"DK=Denmark",
"ES=Spain",
"EE=Estonia",
"EU=Europe",
"FI=Finland",
"FO=Faroe Islands", // Viking Settlers
"FR=France",
@ -334,27 +342,6 @@ public class yacyURL {
"NT=Neutral Zone"
};
/*
* TLDs: aero, biz, com, coop, edu, gov, info, int, mil, museum, name, net,
* org, pro, arpa AC, AD, AE, AERO, AF, AG, AI, AL, AM, AN, AO, AQ, AR,
* ARPA, AS, AT, AU, AW, AZ, BA, BB, BD, BE, BF, BG, BH, BI, BIZ, BJ, BM,
* BN, BO, BR, BS, BT, BV, BW, BY, BZ, CA, CC, CD, CF, CG, CH, CI, CK, CL,
* CM, CN, CO, COM, COOP, CR, CU, CV, CX, CY, CZ, DE, DJ, DK, DM, DO, DZ,
* EC, EDU, EE, EG, ER, ES, ET, EU, FI, FJ, FK, FM, FO, FR, GA, GB, GD, GE,
* GF, GG, GH, GI, GL, GM, GN, GOV, GP, GQ, GR, GS, GT, GU, GW, GY, HK, HM,
* HN, HR, HT, HU, ID, IE, IL, IM, IN, INFO, INT, IO, IQ, IR, IS, IT, JE,
* JM, JO, JOBS, JP, KE, KG, KH, KI, KM, KN, KR, KW, KY, KZ, LA, LB, LC, LI,
* LK, LR, LS, LT, LU, LV, LY, MA, MC, MD, MG, MH, MIL, MK, ML, MM, MN, MO,
* MOBI, MP, MQ, MR, MS, MT, MU, MUSEUM, MV, MW, MX, MY, MZ, NA, NAME, NC,
* NE, NET, NF, NG, NI, NL, NO, NP, NR, NU, NZ, OM, ORG, PA, PE, PF, PG, PH,
* PK, PL, PM, PN, PR, PRO, PS, PT, PW, PY, QA, RE, RO, RU, RW, SA, SB, SC,
* SD, SE, SG, SH, SI, SJ, SK, SL, SM, SN, SO, SR, ST, SU, SV, SY, SZ, TC,
* TD, TF, TG, TH, TJ, TK, TL, TM, TN, TO, TP, TR, TRAVEL, TT, TV, TW, TZ,
* UA, UG, UK, UM, US, UY, UZ, VA, VC, VE, VG, VI, VN, VU, WF, WS, YE, YT,
* YU, ZA, ZM, ZW
*/
public static String dummyHash;
private static HashMap<String, Integer> TLDID = new HashMap<String, Integer>();
@ -375,38 +362,21 @@ public class yacyURL {
}
}
public static final int language_domain_europe_zone = 128 + 1; //{0, 7};
public static final int language_domain_english_zone = 128 + 16 + 64; //{4, 6, 7};
public static final int language_domain_spanish_zone = 128 + 2; //{1, 7};
public static final int language_domain_asia_zone = 128 + 4; //{2, 7};
public static final int language_domain_middleeast_zone = 128 + 8; //{3, 7};
public static final int language_domain_africa_zone = 128 + 32; //{5, 7};
public static final int language_domain_any_zone = 255;
public static final HashMap<String, Integer> zone2map = new HashMap<String, Integer>();
static {
// create a dummy hash
dummyHash = "";
for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-";
// assign TLD-ids and names
insertTLDProps(TLD_EuropeRussia, 0); // European languages but no english
insertTLDProps(TLD_MiddleSouthAmerica, 1); // mainly spanish-speaking countries
insertTLDProps(TLD_SouthEastAsia, 2); // asia
insertTLDProps(TLD_MiddleEastWestAsia, 3); // middle east
insertTLDProps(TLD_NorthAmericaOceania, 4); // english-speaking countries
insertTLDProps(TLD_Africa, 5); // africa
insertTLDProps(TLD_Generic, 6); // anything else, mixed languages, mainly english
insertTLDProps(TLD_EuropeRussia, TLD_EuropeRussia_ID);
insertTLDProps(TLD_MiddleSouthAmerica, TLD_MiddleSouthAmerica_ID);
insertTLDProps(TLD_SouthEastAsia, TLD_SouthEastAsia_ID);
insertTLDProps(TLD_MiddleEastWestAsia, TLD_MiddleEastWestAsia_ID);
insertTLDProps(TLD_NorthAmericaOceania, TLD_NorthAmericaOceania_ID);
insertTLDProps(TLD_Africa, TLD_Africa_ID);
insertTLDProps(TLD_Generic, TLD_Generic_ID);
// the id=7 is used to flag local addresses
zone2map.put("europe", language_domain_europe_zone);
zone2map.put("english", language_domain_english_zone);
zone2map.put("spanish", language_domain_spanish_zone);
zone2map.put("asia", language_domain_asia_zone);
zone2map.put("middleeast", language_domain_middleeast_zone);
zone2map.put("africa", language_domain_africa_zone);
zone2map.put("any", language_domain_any_zone);
}
// class variables
@ -1132,27 +1102,6 @@ public class yacyURL {
return language;
}
public static Map<String, Integer> zoneStatistics(int[] domAccumulators) {
assert domAccumulators.length == 8;
HashMap<String, Integer> zoneCounter = new HashMap<String, Integer>();
Iterator<Map.Entry<String, Integer>> j;
Map.Entry<String, Integer> entry;
for (int i = 0; i < 8; i++) {
j = zone2map.entrySet().iterator();
while (j.hasNext()) {
entry = j.next();
if ((i & entry.getValue().intValue()) != 0) {
if (zoneCounter.containsKey(entry.getKey())) {
zoneCounter.put(entry.getKey(), zoneCounter.get(entry.getKey()) + domAccumulators[i]);
} else {
zoneCounter.put(entry.getKey(), domAccumulators[i]);
}
}
}
}
return zoneCounter;
}
public static void main(String[] args) {
String[][] test = new String[][]{
new String[]{null, "http://www.anomic.de/home/test?x=1#home"},

View File

@ -53,6 +53,7 @@ network.unit.name = freeworld
network.unit.description = Public YaCy Community
network.unit.domain = global
network.unit.search.time = 4
network.unit.dht = true
network.unit.dhtredundancy.junior = 1
network.unit.dhtredundancy.senior = 3
network.unit.bootstrap.seedlist0 = http://www.yacy.net/seed.txt