diff --git a/contrib/pom.xml b/contrib/pom.xml
index 2667cbcda..18586aaaf 100644
--- a/contrib/pom.xml
+++ b/contrib/pom.xml
@@ -78,6 +78,11 @@
gson
2.8.6
+
+ org.xerial
+ sqlite-jdbc
+ 3.34.0
+
diff --git a/contrib/src/main/java/org/archive/modules/postprocessor/TroughCrawlLogFeed.java b/contrib/src/main/java/org/archive/modules/postprocessor/TroughCrawlLogFeed.java
index a2e3e1bf2..91001dd45 100644
--- a/contrib/src/main/java/org/archive/modules/postprocessor/TroughCrawlLogFeed.java
+++ b/contrib/src/main/java/org/archive/modules/postprocessor/TroughCrawlLogFeed.java
@@ -18,10 +18,12 @@
*/
package org.archive.modules.postprocessor;
+
import java.net.MalformedURLException;
import java.util.ArrayList;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.Date;
-import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -89,8 +91,12 @@ public class TroughCrawlLogFeed extends Processor implements Lifecycle {
protected static final Logger logger = Logger.getLogger(TroughCrawlLogFeed.class.getName());
- protected static final int BATCH_MAX_TIME_MS = 20 * 1000;
+ protected static final int BATCH_MAX_TIME_MS = 60 * 1000;
protected static final int BATCH_MAX_SIZE = 400;
+ protected static final String CRAWLED_BATCH = "crawled";
+ protected static final String UNCRAWLED_BATCH = "uncrawled";
+ protected AtomicInteger crawledBatchSize = new AtomicInteger(0);
+ protected AtomicInteger uncrawledBatchSize = new AtomicInteger(0);
protected KeyedProperties kp = new KeyedProperties();
public KeyedProperties getKeyedProperties() {
@@ -119,15 +125,15 @@ public String getRethinkUrl() {
protected TroughClient troughClient() throws MalformedURLException {
if (troughClient == null) {
- troughClient = new TroughClient(getRethinkUrl(), 60 * 60);
+ troughClient = new TroughClient(getRethinkUrl(), null);
troughClient.start();
}
return troughClient;
}
- protected List