mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- refactoring of robots
- added option to crawler to send error-URLs to solr - changed solr scheme slightly (no multi-value fields where no multi values are) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7693 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
1ea0bc775c
commit
6fa439c82b
|
@ -385,7 +385,7 @@ public class Crawler_p {
|
|||
sb.peers.mySeed().hash.getBytes(),
|
||||
new Date(),
|
||||
1,
|
||||
reasonString);
|
||||
reasonString, -1);
|
||||
}
|
||||
} catch (final PatternSyntaxException e) {
|
||||
prop.put("info", "4"); // crawlfilter does not match url
|
||||
|
|
|
@ -105,7 +105,7 @@ public class WebStructurePicture_p {
|
|||
} else {
|
||||
// find start hash
|
||||
String hash = null;
|
||||
try {
|
||||
if (host != null && host.length() > 0) try {
|
||||
hash = UTF8.String((new DigestURI("http://" + host)).hash(), 6, 6);
|
||||
} catch (final MalformedURLException e) {Log.logException(e);}
|
||||
//assert (sb.webStructure.outgoingReferences(hash) != null);
|
||||
|
|
|
@ -9,7 +9,7 @@ import net.yacy.document.parser.html.ContentScraper;
|
|||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.RobotsEntry;
|
||||
import de.anomic.crawler.RobotsTxtEntry;
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
@ -106,7 +106,7 @@ public class getpageinfo_p {
|
|||
final DigestURI theURL = new DigestURI(url);
|
||||
|
||||
// determine if crawling of the current URL is allowed
|
||||
RobotsEntry robotsEntry;
|
||||
RobotsTxtEntry robotsEntry;
|
||||
try {
|
||||
robotsEntry = sb.robots.getEntry(theURL, sb.peers.myBotIDs());
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -162,7 +162,7 @@ public final class crawlReceipt {
|
|||
youare.getBytes(),
|
||||
null,
|
||||
0,
|
||||
result + ":" + reason);
|
||||
result + ":" + reason, -1);
|
||||
//switchboard.noticeURL.remove(receivedUrlhash);
|
||||
prop.put("delay", "3600");
|
||||
return prop;
|
||||
|
|
|
@ -85,7 +85,8 @@ public class urls {
|
|||
sb.peers.mySeed().hash.getBytes(),
|
||||
new Date(),
|
||||
0,
|
||||
"client=____________");
|
||||
"client=____________",
|
||||
-1);
|
||||
|
||||
// create RSS entry
|
||||
prop.put("item_" + c + "_title", "");
|
||||
|
|
|
@ -80,8 +80,8 @@ public class CrawlQueues {
|
|||
log.logConfig("Starting Crawling Management");
|
||||
noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
|
||||
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
|
||||
errorURL = new ZURL(queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
|
||||
delegatedURL = new ZURL(queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
|
||||
errorURL = new ZURL(sb.solrConnector, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
|
||||
delegatedURL = new ZURL(sb.solrConnector, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
|
||||
}
|
||||
|
||||
public void relocate(final File newQueuePath) {
|
||||
|
@ -92,8 +92,8 @@ public class CrawlQueues {
|
|||
|
||||
noticeURL = new NoticedURL(newQueuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
|
||||
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
|
||||
errorURL = new ZURL(newQueuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
|
||||
delegatedURL = new ZURL(newQueuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
|
||||
errorURL = new ZURL(sb.solrConnector, newQueuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
|
||||
delegatedURL = new ZURL(sb.solrConnector, newQueuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
@ -571,7 +571,7 @@ public class CrawlQueues {
|
|||
try {
|
||||
// checking robots.txt for http(s) resources
|
||||
this.request.setStatus("worker-checkingrobots", WorkflowJob.STATUS_STARTED);
|
||||
RobotsEntry robotsEntry;
|
||||
RobotsTxtEntry robotsEntry;
|
||||
if ((request.url().getProtocol().equals("http") || request.url().getProtocol().equals("https")) &&
|
||||
(robotsEntry = sb.robots.getEntry(request.url(), sb.peers.myBotIDs())) != null &&
|
||||
robotsEntry.isDisallowed(request.url())) {
|
||||
|
@ -581,7 +581,7 @@ public class CrawlQueues {
|
|||
UTF8.getBytes(sb.peers.mySeed().hash),
|
||||
new Date(),
|
||||
1,
|
||||
"denied by robots.txt");
|
||||
"denied by robots.txt", -1);
|
||||
this.request.setStatus("worker-disallowed", WorkflowJob.STATUS_FINISHED);
|
||||
} else {
|
||||
// starting a load from the internet
|
||||
|
@ -617,7 +617,7 @@ public class CrawlQueues {
|
|||
UTF8.getBytes(sb.peers.mySeed().hash),
|
||||
new Date(),
|
||||
1,
|
||||
"cannot load: " + result);
|
||||
"cannot load: " + result, -1);
|
||||
this.request.setStatus("worker-error", WorkflowJob.STATUS_FINISHED);
|
||||
} else {
|
||||
this.request.setStatus("worker-processed", WorkflowJob.STATUS_FINISHED);
|
||||
|
@ -629,7 +629,7 @@ public class CrawlQueues {
|
|||
UTF8.getBytes(sb.peers.mySeed().hash),
|
||||
new Date(),
|
||||
1,
|
||||
e.getMessage() + " - in worker");
|
||||
e.getMessage() + " - in worker", -1);
|
||||
Log.logException(e);
|
||||
// Client.initConnectionManager();
|
||||
this.request.setStatus("worker-exception", WorkflowJob.STATUS_FINISHED);
|
||||
|
|
|
@ -202,7 +202,7 @@ public final class CrawlStacker {
|
|||
|
||||
// if the url was rejected we store it into the error URL db
|
||||
if (rejectReason != null) {
|
||||
nextQueue.errorURL.push(entry, UTF8.getBytes(peers.mySeed().hash), new Date(), 1, rejectReason);
|
||||
nextQueue.errorURL.push(entry, UTF8.getBytes(peers.mySeed().hash), new Date(), 1, rejectReason, -1);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
CrawlStacker.this.log.logWarning("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
|
||||
|
@ -469,9 +469,9 @@ public final class CrawlStacker {
|
|||
}
|
||||
|
||||
// deny cgi
|
||||
if (url.isIndividual()) {
|
||||
if (url.isIndividual() && !(profile.crawlingQ())) { // TODO: make special property for crawlingIndividual
|
||||
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' is CGI URL.");
|
||||
return "cgi url not allowed";
|
||||
return "individual url (sessionid etc) not wanted";
|
||||
}
|
||||
|
||||
// deny post properties
|
||||
|
|
|
@ -186,7 +186,7 @@ public class Latency {
|
|||
// find the delay as given by robots.txt on target site
|
||||
long robotsDelay = 0;
|
||||
if (!local) {
|
||||
RobotsEntry robotsEntry;
|
||||
RobotsTxtEntry robotsEntry;
|
||||
try {
|
||||
robotsEntry = Switchboard.getSwitchboard().robots.getEntry(url, thisAgents);
|
||||
} catch (IOException e) {
|
||||
|
@ -239,7 +239,7 @@ public class Latency {
|
|||
// find the delay as given by robots.txt on target site
|
||||
long robotsDelay = 0;
|
||||
if (!local) {
|
||||
RobotsEntry robotsEntry;
|
||||
RobotsTxtEntry robotsEntry;
|
||||
try {
|
||||
robotsEntry = Switchboard.getSwitchboard().robots.getEntry(url, thisAgents);
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -11,16 +11,16 @@
|
|||
//Revision: $LastChangedRevision$
|
||||
//
|
||||
//This program is free software; you can redistribute it and/or modify
|
||||
//it under the terms of the GNU General Public License as published by
|
||||
//it under the terms of the GNU General public License as published by
|
||||
//the Free Software Foundation; either version 2 of the License, or
|
||||
//(at your option) any later version.
|
||||
//
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU General Public License for more details.
|
||||
//GNU General public License for more details.
|
||||
//
|
||||
//You should have received a copy of the GNU General Public License
|
||||
//You should have received a copy of the GNU General public License
|
||||
//along with this program; if not, write to the Free Software
|
||||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
@ -51,15 +51,15 @@ public class RobotsTxt {
|
|||
|
||||
private static Logger log = Logger.getLogger(RobotsTxt.class);
|
||||
|
||||
public static final String ROBOTS_DB_PATH_SEPARATOR = ";";
|
||||
public static final Pattern ROBOTS_DB_PATH_SEPARATOR_MATCHER = Pattern.compile(ROBOTS_DB_PATH_SEPARATOR);
|
||||
protected static final String ROBOTS_DB_PATH_SEPARATOR = ";";
|
||||
protected static final Pattern ROBOTS_DB_PATH_SEPARATOR_MATCHER = Pattern.compile(ROBOTS_DB_PATH_SEPARATOR);
|
||||
|
||||
BEncodedHeap robotsTable;
|
||||
private final ConcurrentHashMap<String, DomSync> syncObjects;
|
||||
//private static final HashSet<String> loadedRobots = new HashSet<String>(); // only for debugging
|
||||
|
||||
private static class DomSync {
|
||||
public DomSync() {}
|
||||
private DomSync() {}
|
||||
}
|
||||
|
||||
public RobotsTxt(final BEncodedHeap robotsTable) {
|
||||
|
@ -78,16 +78,16 @@ public class RobotsTxt {
|
|||
return this.robotsTable.size();
|
||||
}
|
||||
|
||||
public RobotsEntry getEntry(final MultiProtocolURI theURL, final Set<String> thisAgents) throws IOException {
|
||||
public RobotsTxtEntry getEntry(final MultiProtocolURI theURL, final Set<String> thisAgents) throws IOException {
|
||||
if (theURL == null) throw new IllegalArgumentException();
|
||||
if (!theURL.getProtocol().startsWith("http")) return null;
|
||||
return getEntry(theURL, thisAgents, true);
|
||||
}
|
||||
|
||||
private RobotsEntry getEntry(final MultiProtocolURI theURL, final Set<String> thisAgents, final boolean fetchOnlineIfNotAvailableOrNotFresh) throws IOException {
|
||||
private RobotsTxtEntry getEntry(final MultiProtocolURI theURL, final Set<String> thisAgents, final boolean fetchOnlineIfNotAvailableOrNotFresh) throws IOException {
|
||||
// this method will always return a non-null value
|
||||
String urlHostPort = getHostPort(theURL);
|
||||
RobotsEntry robotsTxt4Host = null;
|
||||
RobotsTxtEntry robotsTxt4Host = null;
|
||||
Map<String, byte[]> record;
|
||||
try {
|
||||
record = this.robotsTable.get(this.robotsTable.encodedKey(urlHostPort));
|
||||
|
@ -95,7 +95,7 @@ public class RobotsTxt {
|
|||
log.warn("memory exhausted", e);
|
||||
record = null;
|
||||
}
|
||||
if (record != null) robotsTxt4Host = new RobotsEntry(urlHostPort, record);
|
||||
if (record != null) robotsTxt4Host = new RobotsTxtEntry(urlHostPort, record);
|
||||
|
||||
if (fetchOnlineIfNotAvailableOrNotFresh && (
|
||||
robotsTxt4Host == null ||
|
||||
|
@ -123,7 +123,7 @@ public class RobotsTxt {
|
|||
log.warn("memory exhausted", e);
|
||||
record = null;
|
||||
}
|
||||
if (record != null) robotsTxt4Host = new RobotsEntry(urlHostPort, record);
|
||||
if (record != null) robotsTxt4Host = new RobotsTxtEntry(urlHostPort, record);
|
||||
if (robotsTxt4Host != null &&
|
||||
robotsTxt4Host.getLoadedDate() != null &&
|
||||
System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() <= 1*24*60*60*1000) {
|
||||
|
@ -160,7 +160,7 @@ public class RobotsTxt {
|
|||
// no robots.txt available, make an entry to prevent that the robots loading is done twice
|
||||
if (robotsTxt4Host == null) {
|
||||
// generate artificial entry
|
||||
robotsTxt4Host = new RobotsEntry(
|
||||
robotsTxt4Host = new RobotsTxtEntry(
|
||||
robotsURL,
|
||||
new ArrayList<String>(),
|
||||
new ArrayList<String>(),
|
||||
|
@ -183,7 +183,7 @@ public class RobotsTxt {
|
|||
addEntry(robotsTxt4Host);
|
||||
}
|
||||
} else {
|
||||
final robotsParser parserResult = new robotsParser((byte[]) result[DOWNLOAD_ROBOTS_TXT], thisAgents);
|
||||
final RobotsTxtParser parserResult = new RobotsTxtParser((byte[]) result[DOWNLOAD_ROBOTS_TXT], thisAgents);
|
||||
ArrayList<String> denyPath = parserResult.denyList();
|
||||
if (((Boolean) result[DOWNLOAD_ACCESS_RESTRICTED]).booleanValue()) {
|
||||
denyPath = new ArrayList<String>();
|
||||
|
@ -208,7 +208,7 @@ public class RobotsTxt {
|
|||
return robotsTxt4Host;
|
||||
}
|
||||
|
||||
private RobotsEntry addEntry(
|
||||
private RobotsTxtEntry addEntry(
|
||||
final MultiProtocolURI theURL,
|
||||
final ArrayList<String> allowPathList,
|
||||
final ArrayList<String> denyPathList,
|
||||
|
@ -219,7 +219,7 @@ public class RobotsTxt {
|
|||
final long crawlDelayMillis,
|
||||
final String agentName
|
||||
) {
|
||||
final RobotsEntry entry = new RobotsEntry(
|
||||
final RobotsTxtEntry entry = new RobotsTxtEntry(
|
||||
theURL, allowPathList, denyPathList,
|
||||
loadedDate, modDate,
|
||||
eTag, sitemap, crawlDelayMillis, agentName);
|
||||
|
@ -227,7 +227,7 @@ public class RobotsTxt {
|
|||
return entry;
|
||||
}
|
||||
|
||||
private String addEntry(final RobotsEntry entry) {
|
||||
private String addEntry(final RobotsTxtEntry entry) {
|
||||
// writes a new page and returns key
|
||||
try {
|
||||
this.robotsTable.insert(this.robotsTable.encodedKey(entry.getHostName()), entry.getMem());
|
||||
|
@ -240,10 +240,10 @@ public class RobotsTxt {
|
|||
|
||||
// methods that had been in robotsParser.java:
|
||||
|
||||
public static final int DOWNLOAD_ACCESS_RESTRICTED = 0;
|
||||
public static final int DOWNLOAD_ROBOTS_TXT = 1;
|
||||
public static final int DOWNLOAD_ETAG = 2;
|
||||
public static final int DOWNLOAD_MODDATE = 3;
|
||||
private static final int DOWNLOAD_ACCESS_RESTRICTED = 0;
|
||||
private static final int DOWNLOAD_ROBOTS_TXT = 1;
|
||||
private static final int DOWNLOAD_ETAG = 2;
|
||||
private static final int DOWNLOAD_MODDATE = 3;
|
||||
|
||||
static final String getHostPort(final MultiProtocolURI theURL) {
|
||||
String urlHostPort = null;
|
||||
|
@ -267,7 +267,7 @@ public class RobotsTxt {
|
|||
return port;
|
||||
}
|
||||
|
||||
private static Object[] downloadRobotsTxt(final MultiProtocolURI robotsURL, int redirectionCount, final RobotsEntry entry) throws Exception {
|
||||
private static Object[] downloadRobotsTxt(final MultiProtocolURI robotsURL, int redirectionCount, final RobotsTxtEntry entry) throws Exception {
|
||||
if (robotsURL == null || !robotsURL.getProtocol().startsWith("http")) return null;
|
||||
|
||||
if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null};
|
||||
|
|
|
@ -13,16 +13,16 @@
|
|||
//Revision: $LastChangedRevision$
|
||||
//
|
||||
//This program is free software; you can redistribute it and/or modify
|
||||
//it under the terms of the GNU General Public License as published by
|
||||
//it under the terms of the GNU General public License as published by
|
||||
//the Free Software Foundation; either version 2 of the License, or
|
||||
//(at your option) any later version.
|
||||
//
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU General Public License for more details.
|
||||
//GNU General public License for more details.
|
||||
//
|
||||
//You should have received a copy of the GNU General Public License
|
||||
//You should have received a copy of the GNU General public License
|
||||
//along with this program; if not, write to the Free Software
|
||||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
@ -41,25 +41,25 @@ import net.yacy.cora.document.UTF8;
|
|||
import net.yacy.kelondro.util.ByteArray;
|
||||
|
||||
|
||||
public class RobotsEntry {
|
||||
public class RobotsTxtEntry {
|
||||
|
||||
public static final String HOST_NAME = "hostname";
|
||||
public static final String ALLOW_PATH_LIST = "allow";
|
||||
public static final String DISALLOW_PATH_LIST = "disallow";
|
||||
public static final String LOADED_DATE = "date";
|
||||
public static final String MOD_DATE = "modDate";
|
||||
public static final String ETAG = "etag";
|
||||
public static final String SITEMAP = "sitemap";
|
||||
public static final String CRAWL_DELAY = "crawlDelay";
|
||||
public static final String CRAWL_DELAY_MILLIS = "crawlDelayMillis";
|
||||
public static final String AGENT_NAME = "agentname";
|
||||
private static final String HOST_NAME = "hostname";
|
||||
private static final String ALLOW_PATH_LIST = "allow";
|
||||
private static final String DISALLOW_PATH_LIST = "disallow";
|
||||
private static final String LOADED_DATE = "date";
|
||||
private static final String MOD_DATE = "modDate";
|
||||
private static final String ETAG = "etag";
|
||||
private static final String SITEMAP = "sitemap";
|
||||
private static final String CRAWL_DELAY = "crawlDelay";
|
||||
private static final String CRAWL_DELAY_MILLIS = "crawlDelayMillis";
|
||||
private static final String AGENT_NAME = "agentname";
|
||||
|
||||
// this is a simple record structure that holds all properties of a single crawl start
|
||||
private final Map<String, byte[]> mem;
|
||||
private final List<String> allowPathList, denyPathList;
|
||||
private final String hostName, agentName;
|
||||
|
||||
public RobotsEntry(final String hostName, final Map<String, byte[]> mem) {
|
||||
protected RobotsTxtEntry(final String hostName, final Map<String, byte[]> mem) {
|
||||
this.hostName = hostName.toLowerCase();
|
||||
this.mem = mem;
|
||||
|
||||
|
@ -90,7 +90,7 @@ public class RobotsEntry {
|
|||
this.agentName = this.mem.containsKey(AGENT_NAME) ? UTF8.String(this.mem.get(AGENT_NAME)) : null;
|
||||
}
|
||||
|
||||
public RobotsEntry(
|
||||
protected RobotsTxtEntry(
|
||||
final MultiProtocolURI theURL,
|
||||
final List<String> allowPathList,
|
||||
final List<String> disallowPathList,
|
||||
|
@ -140,15 +140,15 @@ public class RobotsEntry {
|
|||
}
|
||||
}
|
||||
|
||||
public String getHostName() {
|
||||
protected String getHostName() {
|
||||
return this.hostName;
|
||||
}
|
||||
|
||||
public String getAgentName() {
|
||||
protected String getAgentName() {
|
||||
return this.agentName;
|
||||
}
|
||||
|
||||
public Map<String, byte[]> getMem() {
|
||||
protected Map<String, byte[]> getMem() {
|
||||
if (!this.mem.containsKey(HOST_NAME)) this.mem.put(HOST_NAME, UTF8.getBytes(this.hostName));
|
||||
return this.mem;
|
||||
}
|
||||
|
@ -175,34 +175,34 @@ public class RobotsEntry {
|
|||
}
|
||||
}
|
||||
|
||||
public Date getLoadedDate() {
|
||||
protected Date getLoadedDate() {
|
||||
if (this.mem.containsKey(LOADED_DATE)) {
|
||||
return new Date(ByteArray.parseDecimal(this.mem.get(LOADED_DATE)));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void setLoadedDate(final Date newLoadedDate) {
|
||||
protected void setLoadedDate(final Date newLoadedDate) {
|
||||
if (newLoadedDate != null) {
|
||||
this.mem.put(LOADED_DATE, UTF8.getBytes(Long.toString(newLoadedDate.getTime())));
|
||||
}
|
||||
}
|
||||
|
||||
public Date getModDate() {
|
||||
protected Date getModDate() {
|
||||
if (this.mem.containsKey(MOD_DATE)) {
|
||||
return new Date(ByteArray.parseDecimal(this.mem.get(MOD_DATE)));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getETag() {
|
||||
protected String getETag() {
|
||||
if (this.mem.containsKey(ETAG)) {
|
||||
return UTF8.String(this.mem.get(ETAG));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public long getCrawlDelayMillis() {
|
||||
protected long getCrawlDelayMillis() {
|
||||
if (this.mem.containsKey(CRAWL_DELAY_MILLIS)) try {
|
||||
return ByteArray.parseDecimal(this.mem.get(CRAWL_DELAY_MILLIS));
|
||||
} catch (final NumberFormatException e) {
|
|
@ -10,16 +10,16 @@
|
|||
Revision: $LastChangedRevision$
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
it under the terms of the GNU General public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
GNU General public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
You should have received a copy of the GNU General private License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
@ -59,16 +59,16 @@ import java.util.regex.Pattern;
|
|||
* See: http://www.kollar.com/robots.html
|
||||
*/
|
||||
|
||||
public final class robotsParser {
|
||||
public final class RobotsTxtParser {
|
||||
|
||||
private static final Pattern patternTab = Pattern.compile("\t");
|
||||
|
||||
public static final String ROBOTS_USER_AGENT = "User-agent:".toUpperCase();
|
||||
public static final String ROBOTS_DISALLOW = "Disallow:".toUpperCase();
|
||||
public static final String ROBOTS_ALLOW = "Allow:".toUpperCase();
|
||||
public static final String ROBOTS_COMMENT = "#";
|
||||
public static final String ROBOTS_SITEMAP = "Sitemap:".toUpperCase();
|
||||
public static final String ROBOTS_CRAWL_DELAY = "Crawl-delay:".toUpperCase();
|
||||
private static final String ROBOTS_USER_AGENT = "User-agent:".toUpperCase();
|
||||
private static final String ROBOTS_DISALLOW = "Disallow:".toUpperCase();
|
||||
private static final String ROBOTS_ALLOW = "Allow:".toUpperCase();
|
||||
private static final String ROBOTS_COMMENT = "#";
|
||||
private static final String ROBOTS_SITEMAP = "Sitemap:".toUpperCase();
|
||||
private static final String ROBOTS_CRAWL_DELAY = "Crawl-delay:".toUpperCase();
|
||||
|
||||
private final ArrayList<String> allowList;
|
||||
private final ArrayList<String> denyList;
|
||||
|
@ -77,7 +77,7 @@ public final class robotsParser {
|
|||
private final Set<String> myNames; // a list of own name lists
|
||||
private String agentName; // the name of the agent that was used to return the result
|
||||
|
||||
public robotsParser(final byte[] robotsTxt, final Set<String> myNames) {
|
||||
protected RobotsTxtParser(final byte[] robotsTxt, final Set<String> myNames) {
|
||||
this.allowList = new ArrayList<String>(0);
|
||||
this.denyList = new ArrayList<String>(0);
|
||||
this.sitemap = "";
|
||||
|
@ -91,16 +91,6 @@ public final class robotsParser {
|
|||
}
|
||||
}
|
||||
|
||||
public robotsParser(final BufferedReader reader, final Set<String> myNames) {
|
||||
this.allowList = new ArrayList<String>(0);
|
||||
this.denyList = new ArrayList<String>(0);
|
||||
this.sitemap = "";
|
||||
this.crawlDelayMillis = 0;
|
||||
this.myNames = myNames;
|
||||
this.agentName = null;
|
||||
if (reader != null) parse(reader);
|
||||
}
|
||||
|
||||
private void parse(final BufferedReader reader) {
|
||||
final ArrayList<String> deny4AllAgents = new ArrayList<String>();
|
||||
final ArrayList<String> deny4ThisAgents = new ArrayList<String>();
|
||||
|
@ -260,7 +250,7 @@ public final class robotsParser {
|
|||
* does not make any no-DOS-forced crawl pause.
|
||||
* @return the crawl delay between two crawl access times in milliseconds
|
||||
*/
|
||||
public long crawlDelayMillis() {
|
||||
protected long crawlDelayMillis() {
|
||||
return this.crawlDelayMillis;
|
||||
}
|
||||
|
||||
|
@ -271,19 +261,19 @@ public final class robotsParser {
|
|||
* Effects: see also comment to crawlDelayMillis()
|
||||
* @return the name of the user agent that was used for the result properties or null if no user agent name was used to identify the agent
|
||||
*/
|
||||
public String agentName() {
|
||||
protected String agentName() {
|
||||
return this.agentName;
|
||||
}
|
||||
|
||||
public String sitemap() {
|
||||
protected String sitemap() {
|
||||
return this.sitemap;
|
||||
}
|
||||
|
||||
public ArrayList<String> allowList() {
|
||||
protected ArrayList<String> allowList() {
|
||||
return this.allowList;
|
||||
}
|
||||
|
||||
public ArrayList<String> denyList() {
|
||||
protected ArrayList<String> denyList() {
|
||||
return this.denyList;
|
||||
}
|
||||
}
|
|
@ -34,6 +34,7 @@ import java.util.Iterator;
|
|||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.services.federated.solr.SolrSingleConnector;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
import net.yacy.kelondro.index.Index;
|
||||
|
@ -66,13 +67,16 @@ public class ZURL implements Iterable<ZURL.Entry> {
|
|||
// the class object
|
||||
private Index urlIndex;
|
||||
private final ConcurrentLinkedQueue<byte[]> stack;
|
||||
private final SolrSingleConnector solrConnector;
|
||||
|
||||
public ZURL(
|
||||
final SolrSingleConnector solrConnector,
|
||||
final File cachePath,
|
||||
final String tablename,
|
||||
final boolean startWithEmptyFile,
|
||||
final boolean useTailCache,
|
||||
final boolean exceed134217727) {
|
||||
this.solrConnector = solrConnector;
|
||||
// creates a new ZURL in a file
|
||||
cachePath.mkdirs();
|
||||
final File f = new File(cachePath, tablename);
|
||||
|
@ -94,7 +98,8 @@ public class ZURL implements Iterable<ZURL.Entry> {
|
|||
this.stack = new ConcurrentLinkedQueue<byte[]>();
|
||||
}
|
||||
|
||||
public ZURL() {
|
||||
public ZURL(final SolrSingleConnector solrConnector) {
|
||||
this.solrConnector = solrConnector;
|
||||
// creates a new ZUR in RAM
|
||||
this.urlIndex = new RowSet(rowdef);
|
||||
this.stack = new ConcurrentLinkedQueue<byte[]>();
|
||||
|
@ -126,14 +131,24 @@ public class ZURL implements Iterable<ZURL.Entry> {
|
|||
final byte[] executor,
|
||||
final Date workdate,
|
||||
final int workcount,
|
||||
String anycause) {
|
||||
String anycause,
|
||||
int httpcode) {
|
||||
// assert executor != null; // null == proxy !
|
||||
if (exists(bentry.url().hash())) return; // don't insert double causes
|
||||
if (anycause == null) anycause = "unknown";
|
||||
Entry entry = new Entry(bentry, executor, workdate, workcount, anycause);
|
||||
String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : "");
|
||||
Entry entry = new Entry(bentry, executor, workdate, workcount, reason);
|
||||
put(entry);
|
||||
stack.add(entry.hash());
|
||||
Log.logInfo("Rejected URL", bentry.url().toNormalform(false, false) + " - " + anycause);
|
||||
Log.logInfo("Rejected URL", bentry.url().toNormalform(false, false) + " - " + reason);
|
||||
if (this.solrConnector != null) {
|
||||
// send the error to solr
|
||||
try {
|
||||
this.solrConnector.err(bentry.url(), reason, httpcode);
|
||||
} catch (IOException e) {
|
||||
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
while (stack.size() > maxStackSize) stack.poll();
|
||||
}
|
||||
|
||||
|
|
|
@ -152,7 +152,7 @@ public class FTPLoader {
|
|||
if (berr.size() > 0 || response == null) {
|
||||
// some error logging
|
||||
final String detail = (berr.size() > 0) ? "Errorlog: " + berr.toString() : "";
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, " ftp server download, " + detail);
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, " ftp server download, " + detail, -1);
|
||||
throw new IOException("FTPLoader: Unable to download URL '" + request.url().toString() + "': " + detail);
|
||||
}
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ public final class HTTPLoader {
|
|||
private Response load(final Request request, final int retryCount, final long maxFileSize, final boolean checkBlacklist) throws IOException {
|
||||
|
||||
if (retryCount < 0) {
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection counter exceeded");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection counter exceeded", -1);
|
||||
throw new IOException("Redirection counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,7 @@ public final class HTTPLoader {
|
|||
// check if url is in blacklist
|
||||
final String hostlow = host.toLowerCase();
|
||||
if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "url in blacklist");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "url in blacklist", -1);
|
||||
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
|
||||
}
|
||||
|
||||
|
@ -138,7 +138,7 @@ public final class HTTPLoader {
|
|||
redirectionUrlString = redirectionUrlString.trim();
|
||||
|
||||
if (redirectionUrlString.length() == 0) {
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection header empy");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection header empy", code);
|
||||
throw new IOException("CRAWLER Redirection of URL=" + request.url().toString() + " aborted. Location header is empty.");
|
||||
}
|
||||
|
||||
|
@ -151,14 +151,14 @@ public final class HTTPLoader {
|
|||
|
||||
// if we are already doing a shutdown we don't need to retry crawling
|
||||
if (Thread.currentThread().isInterrupted()) {
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "server shutdown");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "server shutdown", code);
|
||||
throw new IOException("CRAWLER Retry of URL=" + request.url().toString() + " aborted because of server shutdown.");
|
||||
}
|
||||
|
||||
// check if the url was already indexed
|
||||
final String dbname = sb.urlExists(Segments.Process.LOCALCRAWLING, redirectionUrl.hash());
|
||||
if (dbname != null) {
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection to double content");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection to double content", code);
|
||||
throw new IOException("CRAWLER Redirection of URL=" + request.url().toString() + " ignored. The url appears already in db " + dbname);
|
||||
}
|
||||
|
||||
|
@ -167,12 +167,12 @@ public final class HTTPLoader {
|
|||
return load(request, retryCount - 1, maxFileSize, checkBlacklist);
|
||||
} else {
|
||||
// no redirection url provided
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "no redirection url provided");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "no redirection url provided", code);
|
||||
throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
|
||||
}
|
||||
} else if (responseBody == null) {
|
||||
// no response, reject file
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "no response body (code = " + code + ")");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "no response body", code);
|
||||
throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
|
||||
} else if (code == 200 || code == 203) {
|
||||
// the transfer is ok
|
||||
|
@ -183,7 +183,7 @@ public final class HTTPLoader {
|
|||
|
||||
// check length again in case it was not possible to get the length before loading
|
||||
if (maxFileSize > 0 && contentLength > maxFileSize) {
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "file size limit exceeded");
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "file size limit exceeded", code);
|
||||
throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,7 @@ public final class HTTPLoader {
|
|||
return response;
|
||||
} else {
|
||||
// if the response has not the right response type then reject file
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "wrong http status code " + code);
|
||||
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "wrong http status code", code);
|
||||
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -133,7 +133,8 @@ public class YMarkEntry extends TreeMap<String, String> {
|
|||
case DATE_MODIFIED:
|
||||
case DATE_VISITED:
|
||||
this.put(b.key(), String.valueOf(System.currentTimeMillis()));
|
||||
default:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ public class YMarkTables {
|
|||
this.deleteBookmark(bmk_user, YMarkUtil.getBookmarkId(url));
|
||||
}
|
||||
|
||||
public TreeMap<String, YMarkTag> getTags(final Iterator<Row> rowIterator) throws IOException {
|
||||
public TreeMap<String, YMarkTag> getTags(final Iterator<Row> rowIterator) {
|
||||
final TreeMap<String,YMarkTag> tags = new TreeMap<String,YMarkTag>();
|
||||
Tables.Row bmk_row = null;
|
||||
Iterator<String> tit = null;
|
||||
|
|
|
@ -305,7 +305,7 @@ public final class HTTPDFileHandler {
|
|||
final boolean accountEmpty = adminAccountBase64MD5.length() == 0;
|
||||
final boolean softauth = accessFromLocalhost && authorization != null && authorization.length() > 6 && (adminAccountBase64MD5.equals(authorization.substring(6)));
|
||||
|
||||
if (protectedPage && ((!softauth && !grantedForLocalhost && !accountEmpty) || requestHeader.userAgent().startsWith("yacybot"))) {
|
||||
if (protectedPage && !softauth && ((!grantedForLocalhost && !accountEmpty) || requestHeader.userAgent().startsWith("yacybot"))) {
|
||||
// authentication required
|
||||
if (authorization == null) {
|
||||
// no authorization given in response. Ask for that
|
||||
|
|
|
@ -523,6 +523,11 @@ public final class Switchboard extends serverSwitch {
|
|||
log.logConfig("Parser: Initializing Mime Type deny list");
|
||||
TextParser.setDenyMime(getConfig(SwitchboardConstants.PARSER_MIME_DENY, ""));
|
||||
|
||||
// set up the solr interface
|
||||
String solrurl = this.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
|
||||
boolean usesolr = this.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurl.length() > 0;
|
||||
this.solrConnector = (usesolr) ? new SolrSingleConnector(solrurl, SolrScheme.SolrCell) : null;
|
||||
|
||||
// start a loader
|
||||
log.logConfig("Starting Crawl Loader");
|
||||
this.loader = new LoaderDispatcher(this);
|
||||
|
@ -605,11 +610,6 @@ public final class Switchboard extends serverSwitch {
|
|||
}
|
||||
}
|
||||
|
||||
// set up the solr interface
|
||||
String solrurl = this.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
|
||||
boolean usesolr = this.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurl.length() > 0;
|
||||
this.solrConnector = (usesolr) ? new SolrSingleConnector(solrurl, SolrScheme.SolrCell) : null;
|
||||
|
||||
// initializing dht chunk generation
|
||||
this.dhtMaxReferenceCount = (int) getConfigLong(SwitchboardConstants.INDEX_DIST_CHUNK_SIZE_START, 50);
|
||||
|
||||
|
@ -2423,7 +2423,7 @@ public final class Switchboard extends serverSwitch {
|
|||
0,
|
||||
0,
|
||||
0);
|
||||
crawlQueues.errorURL.push(bentry, initiator, new Date(), 0, failreason);
|
||||
crawlQueues.errorURL.push(bentry, initiator, new Date(), 0, failreason, -1);
|
||||
}
|
||||
|
||||
public final void heuristicSite(final SearchEvent searchEvent, final String host) {
|
||||
|
|
|
@ -59,8 +59,8 @@ public enum SolrScheme {
|
|||
solrdoc.addField("id", id);
|
||||
solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
|
||||
InetAddress address = Domains.dnsResolve(digestURI.getHost());
|
||||
if (address != null) solrdoc.addField("attr_ip", address.getHostAddress());
|
||||
if (digestURI.getHost() != null) solrdoc.addField("attr_host", digestURI.getHost());
|
||||
if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
|
||||
if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());
|
||||
solrdoc.addField("title", yacydoc.dc_title());
|
||||
solrdoc.addField("author", yacydoc.dc_creator());
|
||||
solrdoc.addField("description", yacydoc.dc_description());
|
||||
|
@ -68,7 +68,7 @@ public enum SolrScheme {
|
|||
solrdoc.addField("last_modified", header.lastModified());
|
||||
solrdoc.addField("keywords", yacydoc.dc_subject(' '));
|
||||
String content = UTF8.String(yacydoc.getTextBytes());
|
||||
solrdoc.addField("attr_text", content);
|
||||
solrdoc.addField("text_t", content);
|
||||
int contentwc = content.split(" ").length;
|
||||
solrdoc.addField("wordcount_i", contentwc);
|
||||
|
||||
|
@ -111,14 +111,14 @@ public enum SolrScheme {
|
|||
solrdoc.addField("attr_outboundlinks", yacydoc.outboundLinks().toArray());
|
||||
|
||||
// charset
|
||||
solrdoc.addField("attr_charset", yacydoc.getCharset());
|
||||
solrdoc.addField("charset_s", yacydoc.getCharset());
|
||||
|
||||
// coordinates
|
||||
if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) {
|
||||
solrdoc.addField("lon_coordinate", yacydoc.lon());
|
||||
solrdoc.addField("lat_coordinate", yacydoc.lat());
|
||||
}
|
||||
solrdoc.addField("attr_httpstatus", "200");
|
||||
solrdoc.addField("httpstatus_i", 200);
|
||||
Object parser = yacydoc.getParserObject();
|
||||
if (parser instanceof ContentScraper) {
|
||||
ContentScraper html = (ContentScraper) parser;
|
||||
|
@ -137,9 +137,9 @@ public enum SolrScheme {
|
|||
// meta tags
|
||||
Map<String, String> metas = html.getMetas();
|
||||
String robots = metas.get("robots");
|
||||
if (robots != null) solrdoc.addField("attr_meta_robots", robots);
|
||||
if (robots != null) solrdoc.addField("metarobots_t", robots);
|
||||
String generator = metas.get("generator");
|
||||
if (generator != null) solrdoc.addField("attr_meta_generator", generator);
|
||||
if (generator != null) solrdoc.addField("metagenerator_t", generator);
|
||||
|
||||
// bold, italic
|
||||
String[] bold = html.getBold();
|
||||
|
|
|
@ -26,6 +26,7 @@ package net.yacy.cora.services.federated.solr;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -41,8 +42,11 @@ import org.apache.solr.client.solrj.response.QueryResponse;
|
|||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.cora.protocol.ResponseHeader;
|
||||
import net.yacy.document.Document;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
||||
|
||||
|
@ -189,11 +193,10 @@ public class SolrSingleConnector {
|
|||
*/
|
||||
|
||||
public void add(String id, ResponseHeader header, Document doc) throws IOException {
|
||||
add(id, header, doc, this.scheme);
|
||||
add(this.scheme.yacy2solr(id, header, doc));
|
||||
}
|
||||
|
||||
public void add(String id, ResponseHeader header, Document doc, SolrScheme tempScheme) throws IOException {
|
||||
SolrInputDocument solrdoc = tempScheme.yacy2solr(id, header, doc);
|
||||
|
||||
private void add(SolrInputDocument solrdoc) throws IOException {
|
||||
int thisrrc = this.transmissionRoundRobinCounter;
|
||||
int nextrrc = thisrrc++;
|
||||
if (nextrrc >= transmissionQueueCount) nextrrc = 0;
|
||||
|
@ -223,6 +226,28 @@ public class SolrSingleConnector {
|
|||
}
|
||||
}
|
||||
|
||||
public void err(DigestURI digestURI, String failReason, int httpstatus) throws IOException {
|
||||
|
||||
SolrInputDocument solrdoc = new SolrInputDocument();
|
||||
solrdoc.addField("id", UTF8.String(digestURI.hash()));
|
||||
solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
|
||||
InetAddress address = Domains.dnsResolve(digestURI.getHost());
|
||||
if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
|
||||
if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());
|
||||
|
||||
// path elements of link
|
||||
String path = digestURI.getPath();
|
||||
if (path != null) {
|
||||
String[] paths = path.split("/");
|
||||
if (paths.length > 0) solrdoc.addField("attr_paths", paths);
|
||||
}
|
||||
|
||||
solrdoc.addField("failreason_t", failReason);
|
||||
solrdoc.addField("httpstatus_i", httpstatus);
|
||||
|
||||
add(solrdoc);
|
||||
}
|
||||
|
||||
private void flushTransmissionQueue(int idx) throws IOException {
|
||||
Collection<SolrInputDocument> c = new ArrayList<SolrInputDocument>();
|
||||
while (this.transmissionQueue[idx].size() > 0) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user