mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Take out mailto collect in internal parsed document
As earlier plans to make use of mailto as separate webgraph entity didn't materialize (see http://forum.yacy-websuche.de/viewtopic.php?f=8&t=5726&p=32493&hilit=mailto#p32493) free the unused handling and resources.
This commit is contained in:
parent
335868edba
commit
c77e43a391
|
@ -424,23 +424,6 @@ public class ViewFile {
|
|||
boolean dark = true;
|
||||
int i = 0;
|
||||
|
||||
if (document.getEmaillinks() != null) {
|
||||
Iterator<AnchorURL> emailit = document.getEmaillinks().iterator();
|
||||
while (emailit.hasNext()) {
|
||||
AnchorURL eentry = emailit.next();
|
||||
prop.put("viewMode_links_" + i + "_nr", i);
|
||||
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
|
||||
prop.put("viewMode_links_" + i + "_type", "email");
|
||||
prop.put("viewMode_links_" + i + "_text", (eentry.getTextProperty().isEmpty()) ? " " : eentry.getTextProperty());
|
||||
prop.put("viewMode_links_" + i + "_url", "#");
|
||||
prop.put("viewMode_links_" + i + "_link", eentry.toNormalform(true));
|
||||
prop.put("viewMode_links_" + i + "_rel", "");
|
||||
prop.put("viewMode_links_" + i + "_name", eentry.getNameProperty());
|
||||
dark = !dark;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
|
||||
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
|
||||
dark = (i % 2 == 0);
|
||||
|
|
|
@ -86,7 +86,6 @@ public class Document {
|
|||
// text in image tags.
|
||||
private LinkedHashMap<AnchorURL, String> audiolinks, videolinks, applinks, hyperlinks; // TODO: check if redundant value (set to key.getNameProperty()) is needed
|
||||
private LinkedHashMap<DigestURL, String> inboundlinks, outboundlinks;
|
||||
private Set<AnchorURL> emaillinks; // mailto: links
|
||||
/** links to icons that belongs to the document (mapped by absolute URL) */
|
||||
private Map<DigestURL, IconEntry> icons;
|
||||
private boolean resorted;
|
||||
|
@ -141,7 +140,6 @@ public class Document {
|
|||
this.audiolinks = null;
|
||||
this.videolinks = null;
|
||||
this.applinks = null;
|
||||
this.emaillinks = null;
|
||||
this.icons = new HashMap<>();
|
||||
this.resorted = false;
|
||||
this.inboundlinks = null;
|
||||
|
@ -520,17 +518,9 @@ dc_rights
|
|||
return this.applinks;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mailto links
|
||||
*/
|
||||
public Set<AnchorURL> getEmaillinks() {
|
||||
// this is part of the getAnchor-set: only links to email addresses
|
||||
if (!this.resorted) resortLinks();
|
||||
return this.emaillinks;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return last modification date of the source document
|
||||
* @return last modification date of the source document. (The date is initialized with last modification date or received date)
|
||||
*/
|
||||
public Date getLastModified() {
|
||||
return this.lastModified;
|
||||
|
@ -551,7 +541,7 @@ dc_rights
|
|||
if (this.resorted) return;
|
||||
synchronized (this) {
|
||||
if (this.resorted) return;
|
||||
// extract hyperlinks, medialinks and emaillinks from anchorlinks
|
||||
// extract hyperlinks, medialinks from anchorlinks
|
||||
String u;
|
||||
int extpos, qpos;
|
||||
String ext = null;
|
||||
|
@ -562,7 +552,6 @@ dc_rights
|
|||
this.videolinks = new LinkedHashMap<AnchorURL, String>();
|
||||
this.audiolinks = new LinkedHashMap<AnchorURL, String>();
|
||||
this.applinks = new LinkedHashMap<AnchorURL, String>();
|
||||
this.emaillinks = new LinkedHashSet<AnchorURL>();
|
||||
final Map<AnchorURL, ImageEntry> collectedImages = new HashMap<AnchorURL, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
|
||||
for (final Map.Entry<DigestURL, ImageEntry> entry: this.images.entrySet()) {
|
||||
if (entry.getKey() != null && entry.getKey().getHost() != null && entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image");
|
||||
|
@ -571,11 +560,6 @@ dc_rights
|
|||
if (url == null) continue;
|
||||
u = url.toNormalform(true);
|
||||
final String name = url.getNameProperty();
|
||||
// check mailto scheme first (not suppose to get into in/outboundlinks or hyperlinks -> crawler can't process)
|
||||
if (url.getProtocol().equals("mailto")) {
|
||||
this.emaillinks.add(url);
|
||||
continue;
|
||||
}
|
||||
|
||||
final boolean noindex = url.getRelProperty().toLowerCase().indexOf("noindex",0) >= 0;
|
||||
final boolean nofollow = url.getRelProperty().toLowerCase().indexOf("nofollow",0) >= 0;
|
||||
|
|
Loading…
Reference in New Issue
Block a user