mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added web structure analysis for a special domain that can be requested from the api.
Example: http://localhost:8080/api/webstructure.xml?about=www.yacy.net returns a xml with the following content: <?xml version="1.0"?> <webstructure> <domains reference="reverse" count="1" maxref="300"> <domain host="www.yacy.net" id="FXg39Q" date="20090401"> <citation host="java.sun.com" id="o-R3yY" count="1" /> <citation host="yacy-suche.de" id="-KCLaB" count="1" /> <citation host="suma-ev.de" id="VRAHIA" count="1" /> <citation host="www.kit.edu" id="EMaLDQ" count="1" /> <citation host="yacy.net" id="Fh1hyQ" count="1" /> <citation host="www.fzk.de" id="V2Kl-A" count="1" /> <citation host="en.wikipedia.org" id="rwtdfR" count="3" /> <citation host="vimeo.com" id="MmdQDY" count="3" /> <citation host="liebel.fzk.de" id="sX4ozA" count="6" /> </domain> </domains> </webstructure> git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5766 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
b6c2167143
commit
bd409fb7ba
|
@ -27,6 +27,7 @@
|
|||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -124,7 +125,8 @@ public class WebStructurePicture_p {
|
|||
if (nextlayer == maxlayer) return mynodes;
|
||||
nextlayer++;
|
||||
final double radius = 1.0 / (1 << nextlayer);
|
||||
final Map<String, Integer> next = structure.references(centerhash);
|
||||
plasmaWebStructure.structureEntry sr = structure.references(centerhash);
|
||||
final Map<String, Integer> next = (sr == null) ? new HashMap<String, Integer>() : sr.references;
|
||||
Map.Entry<String, Integer> entry;
|
||||
String targethash, targethost;
|
||||
// first set points to next hosts
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -30,6 +31,7 @@ import de.anomic.plasma.plasmaSwitchboard;
|
|||
import de.anomic.plasma.plasmaWebStructure;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacyURL;
|
||||
|
||||
public class webstructure {
|
||||
|
||||
|
@ -37,24 +39,60 @@ public class webstructure {
|
|||
final serverObjects prop = new serverObjects();
|
||||
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
|
||||
final boolean latest = ((post == null) ? false : post.containsKey("latest"));
|
||||
String about = ((post == null) ? null : post.get("about", null));
|
||||
|
||||
if (about != null) {
|
||||
yacyURL url = null;
|
||||
if (about.length() > 6) {
|
||||
try {
|
||||
url = new yacyURL(about, null);
|
||||
about = url.hash().substring(6);
|
||||
} catch (MalformedURLException e) {
|
||||
about = null;
|
||||
}
|
||||
}
|
||||
if (about != null) {
|
||||
plasmaWebStructure.structureEntry sentry = sb.webStructure.references(about);
|
||||
if (sentry != null) {
|
||||
reference(prop, 0, sentry, sb.webStructure);
|
||||
prop.put("domains", 1);
|
||||
} else {
|
||||
prop.put("domains", 0);
|
||||
}
|
||||
} else {
|
||||
prop.put("domains", 0);
|
||||
}
|
||||
} else {
|
||||
final Iterator<plasmaWebStructure.structureEntry> i = sb.webStructure.structureEntryIterator(latest);
|
||||
int c = 0, d;
|
||||
int c = 0;
|
||||
plasmaWebStructure.structureEntry sentry;
|
||||
Map.Entry<String, Integer> refentry;
|
||||
String refdom, refhash;
|
||||
Integer refcount;
|
||||
Iterator<Map.Entry<String, Integer>> k;
|
||||
while (i.hasNext()) {
|
||||
sentry = i.next();
|
||||
reference(prop, c, sentry, sb.webStructure);
|
||||
c++;
|
||||
}
|
||||
prop.put("domains", c);
|
||||
if (latest) sb.webStructure.joinOldNew();
|
||||
}
|
||||
prop.put("maxref", plasmaWebStructure.maxref);
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
|
||||
public static void reference(serverObjects prop, int c, plasmaWebStructure.structureEntry sentry, plasmaWebStructure ws) {
|
||||
prop.put("domains_" + c + "_hash", sentry.domhash);
|
||||
prop.put("domains_" + c + "_domain", sentry.domain);
|
||||
prop.put("domains_" + c + "_date", sentry.date);
|
||||
k = sentry.references.entrySet().iterator();
|
||||
d = 0;
|
||||
Iterator<Map.Entry<String, Integer>> k = sentry.references.entrySet().iterator();
|
||||
Map.Entry<String, Integer> refentry;
|
||||
String refdom, refhash;
|
||||
Integer refcount;
|
||||
int d = 0;
|
||||
refloop: while (k.hasNext()) {
|
||||
refentry = k.next();
|
||||
refhash = refentry.getKey();
|
||||
refdom = sb.webStructure.resolveDomHash2DomString(refhash);
|
||||
refdom = ws.resolveDomHash2DomString(refhash);
|
||||
if (refdom == null) continue refloop;
|
||||
prop.put("domains_" + c + "_citations_" + d + "_refhash", refhash);
|
||||
prop.put("domains_" + c + "_citations_" + d + "_refdom", refdom);
|
||||
|
@ -63,13 +101,5 @@ public class webstructure {
|
|||
d++;
|
||||
}
|
||||
prop.put("domains_" + c + "_citations", d);
|
||||
c++;
|
||||
}
|
||||
prop.put("domains", c);
|
||||
prop.put("maxref", plasmaWebStructure.maxref);
|
||||
if (latest) sb.webStructure.joinOldNew();
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -221,17 +221,23 @@ public class plasmaWebStructure {
|
|||
return s.toString();
|
||||
}
|
||||
|
||||
public Map<String, Integer> references(final String domhash) {
|
||||
public structureEntry references(final String domhash) {
|
||||
// returns a map with a domhash(String):refcount(Integer) relation
|
||||
assert domhash.length() == 6;
|
||||
SortedMap<String, String> tailMap;
|
||||
Map<String, Integer> h = new HashMap<String, Integer>();
|
||||
String domain = "";
|
||||
String date = "";
|
||||
String ref;
|
||||
synchronized (structure_old) {
|
||||
tailMap = structure_old.tailMap(domhash);
|
||||
if (!tailMap.isEmpty()) {
|
||||
final String key = tailMap.firstKey();
|
||||
if (key.startsWith(domhash)) {
|
||||
h = refstr2map(tailMap.get(key));
|
||||
domain = key.substring(7);
|
||||
ref = tailMap.get(key);
|
||||
date = ref.substring(0, 8);
|
||||
h = refstr2map(ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -240,11 +246,16 @@ public class plasmaWebStructure {
|
|||
if (!tailMap.isEmpty()) {
|
||||
final String key = tailMap.firstKey();
|
||||
if (key.startsWith(domhash)) {
|
||||
h.putAll(refstr2map(tailMap.get(key)));
|
||||
ref = tailMap.get(key);
|
||||
if (domain.length() == 0) domain = key.substring(7);
|
||||
if (date.length() == 0) date = ref.substring(0, 8);
|
||||
assert domain == key.substring(7) : "domain = " + domain + ", key = " + key;
|
||||
h.putAll(refstr2map(ref));
|
||||
}
|
||||
}
|
||||
}
|
||||
return h;
|
||||
if (h.size() == 0) return null;
|
||||
return new structureEntry(domhash, domain, date, h);
|
||||
}
|
||||
|
||||
public int referencesCount(final String domhash) {
|
||||
|
@ -302,7 +313,8 @@ public class plasmaWebStructure {
|
|||
final String domhash = url.hash().substring(6);
|
||||
|
||||
// parse the new reference string and join it with the stored references
|
||||
final Map<String, Integer> refs = references(domhash);
|
||||
structureEntry structure = references(domhash);
|
||||
final Map<String, Integer> refs = (structure == null) ? new HashMap<String, Integer>() : structure.references;
|
||||
assert reference.length() % 12 == 0;
|
||||
String dom;
|
||||
int c;
|
||||
|
|
Loading…
Reference in New Issue
Block a user