mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
proper deletion of loadtime index
This commit is contained in:
parent
bd3f2483a1
commit
9c38b1254e
|
@ -416,6 +416,7 @@ public class Crawler_p {
|
||||||
if ("smb.ftp".indexOf(u.getProtocol()) >= 0 || "pdf".equals(MultiProtocolURL.getFileExtension(u.getFileName()))) anysmbftporpdf = true;
|
if ("smb.ftp".indexOf(u.getProtocol()) >= 0 || "pdf".equals(MultiProtocolURL.getFileExtension(u.getFileName()))) anysmbftporpdf = true;
|
||||||
}
|
}
|
||||||
sb.index.fulltext().remove(deleteIDs);
|
sb.index.fulltext().remove(deleteIDs);
|
||||||
|
deleteIDs.forEach(urlhash -> {try {sb.index.loadTimeIndex().remove(urlhash.getBytes());} catch (IOException e) {}});
|
||||||
sb.crawlQueues.removeHosts(hosthashes);
|
sb.crawlQueues.removeHosts(hosthashes);
|
||||||
sb.index.fulltext().commit(true);
|
sb.index.fulltext().commit(true);
|
||||||
|
|
||||||
|
@ -437,6 +438,7 @@ public class Crawler_p {
|
||||||
String basepath = u.toNormalform(true);
|
String basepath = u.toNormalform(true);
|
||||||
if (!basepath.endsWith("/")) {final int p = basepath.lastIndexOf("/"); if (p > 0) basepath = basepath.substring(0, p + 1);}
|
if (!basepath.endsWith("/")) {final int p = basepath.lastIndexOf("/"); if (p > 0) basepath = basepath.substring(0, p + 1);}
|
||||||
final int count = sb.index.fulltext().remove(basepath, deleteageDate);
|
final int count = sb.index.fulltext().remove(basepath, deleteageDate);
|
||||||
|
try {sb.index.loadTimeIndex().clear();} catch (IOException e) {}
|
||||||
if (count > 0) ConcurrentLog.info("Crawler_p", "deleted " + count + " documents for host " + u.getHost());
|
if (count > 0) ConcurrentLog.info("Crawler_p", "deleted " + count + " documents for host " + u.getHost());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -143,7 +143,6 @@ public class IndexControlURLs_p {
|
||||||
if ( post.get("deleteFirstSeen", "").equals("on")) {
|
if ( post.get("deleteFirstSeen", "").equals("on")) {
|
||||||
try {
|
try {
|
||||||
segment.firstSeenIndex().clear();
|
segment.firstSeenIndex().clear();
|
||||||
segment.loadTimeIndex().clear();
|
|
||||||
} catch (final IOException e) {}
|
} catch (final IOException e) {}
|
||||||
}
|
}
|
||||||
if ( post.get("deleteCrawlQueues", "").equals("on") ) {
|
if ( post.get("deleteCrawlQueues", "").equals("on") ) {
|
||||||
|
@ -166,6 +165,7 @@ public class IndexControlURLs_p {
|
||||||
|
|
||||||
ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", ClientIdentification.yacyInternetCrawlerAgentName));
|
ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", ClientIdentification.yacyInternetCrawlerAgentName));
|
||||||
int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, agent, CacheStrategy.IFEXIST);
|
int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, agent, CacheStrategy.IFEXIST);
|
||||||
|
try {segment.loadTimeIndex().remove(urlhash.getBytes());} catch (IOException e) {}
|
||||||
prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
|
prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,6 +183,7 @@ public class IndexControlURLs_p {
|
||||||
sb.urlRemove(segment, urlhash.getBytes());
|
sb.urlRemove(segment, urlhash.getBytes());
|
||||||
prop.putHTML("result", "Removed URL " + url);
|
prop.putHTML("result", "Removed URL " + url);
|
||||||
}
|
}
|
||||||
|
segment.loadTimeIndex().remove(urlhash.getBytes());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
prop.putHTML("result", "Error when querying the url hash " + urlhash + ":" + e.getMessage());
|
prop.putHTML("result", "Error when querying the url hash " + urlhash + ":" + e.getMessage());
|
||||||
}
|
}
|
||||||
|
@ -201,6 +202,7 @@ public class IndexControlURLs_p {
|
||||||
prop.put("result", "No input given; nothing deleted.");
|
prop.put("result", "No input given; nothing deleted.");
|
||||||
} else {
|
} else {
|
||||||
sb.urlRemove(segment, urlhash.getBytes());
|
sb.urlRemove(segment, urlhash.getBytes());
|
||||||
|
try {segment.loadTimeIndex().remove(urlhash.getBytes());} catch (IOException e) {}
|
||||||
prop.putHTML("result", "Removed URL " + urlstring);
|
prop.putHTML("result", "Removed URL " + urlstring);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -267,6 +269,7 @@ public class IndexControlURLs_p {
|
||||||
Set<String> hostnames = new HashSet<String>();
|
Set<String> hostnames = new HashSet<String>();
|
||||||
hostnames.add(domain);
|
hostnames.add(domain);
|
||||||
segment.fulltext().deleteStaleDomainNames(hostnames, null);
|
segment.fulltext().deleteStaleDomainNames(hostnames, null);
|
||||||
|
try {segment.loadTimeIndex().clear();} catch (IOException e) {} // delete all to prevent that existing entries reject reloading
|
||||||
// trigger the loading of the table
|
// trigger the loading of the table
|
||||||
post.put("statistics", "");
|
post.put("statistics", "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -161,6 +161,7 @@ public class IndexDeletion_p {
|
||||||
} else {
|
} else {
|
||||||
sb.remove(ids);
|
sb.remove(ids);
|
||||||
defaultConnector.commit(false);
|
defaultConnector.commit(false);
|
||||||
|
ids.forEach(urlhash -> {try {sb.index.loadTimeIndex().remove(urlhash.getBytes());} catch (IOException e) {}});
|
||||||
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs matching with " + urldelete);
|
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs matching with " + urldelete);
|
||||||
prop.put("urldelete-active", 2);
|
prop.put("urldelete-active", 2);
|
||||||
}
|
}
|
||||||
|
@ -177,6 +178,7 @@ public class IndexDeletion_p {
|
||||||
try {
|
try {
|
||||||
defaultConnector.deleteByQuery(regexquery);
|
defaultConnector.deleteByQuery(regexquery);
|
||||||
defaultConnector.commit(false);
|
defaultConnector.commit(false);
|
||||||
|
try {sb.index.loadTimeIndex().clear();} catch (IOException e) {}
|
||||||
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, regex match = " + urldelete);
|
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, regex match = " + urldelete);
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
}
|
}
|
||||||
|
@ -206,6 +208,7 @@ public class IndexDeletion_p {
|
||||||
try {
|
try {
|
||||||
defaultConnector.deleteByQuery(collection1Query);
|
defaultConnector.deleteByQuery(collection1Query);
|
||||||
defaultConnector.commit(false);
|
defaultConnector.commit(false);
|
||||||
|
try {sb.index.loadTimeIndex().clear();} catch (IOException e) {}
|
||||||
if (webgraphConnector != null) webgraphConnector.deleteByQuery(webgraphQuery);
|
if (webgraphConnector != null) webgraphConnector.deleteByQuery(webgraphQuery);
|
||||||
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs older than " + timedelete_number + " " + timedelete_unit);
|
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs older than " + timedelete_number + " " + timedelete_unit);
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
|
@ -232,6 +235,7 @@ public class IndexDeletion_p {
|
||||||
try {
|
try {
|
||||||
defaultConnector.deleteByQuery(query);
|
defaultConnector.deleteByQuery(query);
|
||||||
defaultConnector.commit(false);
|
defaultConnector.commit(false);
|
||||||
|
try {sb.index.loadTimeIndex().clear();} catch (IOException e) {}
|
||||||
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, collection " + collectiondelete);
|
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, collection " + collectiondelete);
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
}
|
}
|
||||||
|
@ -258,6 +262,7 @@ public class IndexDeletion_p {
|
||||||
ConcurrentLog.info("IndexDeletion", "delete by query \"" + querydelete + "\", size before deletion = " + connector.getSize());
|
ConcurrentLog.info("IndexDeletion", "delete by query \"" + querydelete + "\", size before deletion = " + connector.getSize());
|
||||||
connector.deleteByQuery(querydelete);
|
connector.deleteByQuery(querydelete);
|
||||||
connector.commit(false);
|
connector.commit(false);
|
||||||
|
try {sb.index.loadTimeIndex().clear();} catch (IOException e) {}
|
||||||
ConcurrentLog.info("IndexDeletion", "delete by query \"" + querydelete + "\", size after commit = " + connector.getSize());
|
ConcurrentLog.info("IndexDeletion", "delete by query \"" + querydelete + "\", size after commit = " + connector.getSize());
|
||||||
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, solr query, q = " + querydelete);
|
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, solr query, q = " + querydelete);
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user