Fixed raw IPV6 addresses snapshots read/write on FAT32 and NTFS fs

Fixes issue #225
This commit is contained in:
luccioman 2018-09-12 17:34:40 +02:00
parent 10548229af
commit 7adbd1f87d
2 changed files with 77 additions and 15 deletions

View File

@ -25,6 +25,9 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -194,7 +197,7 @@ public class Snapshots {
/** /**
* list the snapshots for a given host name * list the snapshots for a given host name
* @param hostport the <host>.<port> identifier for the domain * @param hostport the <host>.<port> identifier for the domain (with the same format as applied by the Snapshots.pathToHostPortDir() function)
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth * @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
* @return a map with a set for each depth in the domain of the host name * @return a map with a set for each depth in the domain of the host name
*/ */
@ -244,8 +247,7 @@ public class Snapshots {
public File definePath(final DigestURL url, final int depth, final Date date, final String ext) { public File definePath(final DigestURL url, final int depth, final Date date, final String ext) {
String id = ASCII.String(url.hash()); String id = ASCII.String(url.hash());
String ds = GenericFormatter.SHORT_MINUTE_FORMATTER.format(date); String ds = GenericFormatter.SHORT_MINUTE_FORMATTER.format(date);
File path = new File(pathToShard(url, depth), id + "." + ds + "." + ext); return new File(pathToShard(url, depth), id + "." + ds + "." + ext);
return path;
} }
/** /**
@ -268,7 +270,7 @@ public class Snapshots {
/** /**
* Delete information about the storage of a snapshot to the Snapshot-internal index. * Delete information about the storage of a snapshot to the Snapshot-internal index.
* The actual deletion of files in the target directory must be done elsewehre, this method does not store the snapshot files. * The actual deletion of files in the target directory must be done elsewhere, this method does not store the snapshot files.
* @param url * @param url
* @param depth * @param depth
* @param date * @param date
@ -335,18 +337,30 @@ public class Snapshots {
} }
} }
if (host != null && depth == null) { if (host != null && depth == null) {
String hostport = pathToHostPortDir(host,80); String hostport = pathToHostPortDir(host, 80);
TreeMap<Integer, TreeSet<String>> depthIdsMap = this.directory.get(hostport); TreeMap<Integer, TreeSet<String>> depthIdsMap = this.directory.get(hostport);
if (depthIdsMap != null) loop: for (Map.Entry<Integer, TreeSet<String>> depthIds: depthIdsMap.entrySet()) { if(depthIdsMap == null && isIpv6AddrHost(host)) {
for (String id: depthIds.getValue()) { /* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
dateIdResult.put(id, new String[]{hostport, Integer.toString(depthIds.getKey())}); hostport = pathToHostPortDir(host, 80, false);
if (order == Order.ANY && dateIdResult.size() >= maxcount) break loop; depthIdsMap = this.directory.get(hostport);
} }
if (depthIdsMap != null) {
loop: for (Map.Entry<Integer, TreeSet<String>> depthIds: depthIdsMap.entrySet()) {
for (String id: depthIds.getValue()) {
dateIdResult.put(id, new String[]{hostport, Integer.toString(depthIds.getKey())});
if (order == Order.ANY && dateIdResult.size() >= maxcount) break loop;
}
}
} }
} }
if (host != null && depth != null) { if (host != null && depth != null) {
String hostport = pathToHostPortDir(host,80); String hostport = pathToHostPortDir(host, 80);
TreeMap<Integer, TreeSet<String>> domaindepth = this.directory.get(hostport); TreeMap<Integer, TreeSet<String>> domaindepth = this.directory.get(hostport);
if(domaindepth == null && isIpv6AddrHost(host)) {
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
hostport = pathToHostPortDir(host, 80, false);
domaindepth = this.directory.get(hostport);
}
if (domaindepth != null) { if (domaindepth != null) {
TreeSet<String> ids = domaindepth.get(depth); TreeSet<String> ids = domaindepth.get(depth);
if (ids != null) loop: for (String id: ids) { if (ids != null) loop: for (String id: ids) {
@ -430,6 +444,10 @@ public class Snapshots {
public Collection<File> findPaths(final DigestURL url, final int depth, final String ext) { public Collection<File> findPaths(final DigestURL url, final int depth, final String ext) {
String id = ASCII.String(url.hash()); String id = ASCII.String(url.hash());
File pathToShard = pathToShard(url, depth); File pathToShard = pathToShard(url, depth);
if(!pathToShard.exists() && isIpv6AddrHost(url.getHost())) {
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
pathToShard = pathToShard(pathToHostPortDir(url.getHost(), url.getPort(), false), ASCII.String(url.hash()), depth);
}
String[] list = pathToShard.exists() && pathToShard.isDirectory() ? pathToShard.list() : null; // may be null if path does not exist String[] list = pathToShard.exists() && pathToShard.isDirectory() ? pathToShard.list() : null; // may be null if path does not exist
ArrayList<File> paths = new ArrayList<>(); ArrayList<File> paths = new ArrayList<>();
if (list != null) { if (list != null) {
@ -450,9 +468,41 @@ public class Snapshots {
File pathToShard = new File(pathToDepthDir, pathToShard(urlhash)); File pathToShard = new File(pathToDepthDir, pathToShard(urlhash));
return pathToShard; return pathToShard;
} }
/**
* @param host a domain name or IP address
* @return true when the host string is a raw IPV6 address (with square brackets)
*/
private boolean isIpv6AddrHost(final String host) {
return (host != null && host.startsWith("[") && host.endsWith("]") && host.contains(":"));
}
/**
* @param host a domain name or IP address
* @param port a port number
* @return a representation of the host and port encoding IPV6 addresses for better support accross file systems (notably FAT or NTFS)
*/
private String pathToHostPortDir(final String host, final int port) { private String pathToHostPortDir(final String host, final int port) {
return host + "." + port; return pathToHostPortDir(host, port, true);
}
/**
* @param host a domain name or IP address
* @param port a port number
* @param encodeIpv6 when true, encode the host for better support accross file systems (notably FAT or NTFS)
* @return a representation of the host and port
*/
private String pathToHostPortDir(final String host, final int port, final boolean encodeIpv6) {
String encodedHost = host;
if(encodeIpv6 && isIpv6AddrHost(host)) {
/* Percent-encode the host name when it is an IPV6 address, as the ':' character is illegal in a file name on MS Windows FAT32 and NTFS file systems */
try {
encodedHost = URLEncoder.encode(host, StandardCharsets.UTF_8.name());
} catch (final UnsupportedEncodingException e) {
/* This should not happen has UTF-8 encoding support is required for any JVM implementation */
}
}
return encodedHost + "." + port;
} }
private String pathToDepthDir(final int depth) { private String pathToDepthDir(final int depth) {

View File

@ -35,6 +35,7 @@ import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
@ -84,6 +85,15 @@ public class Transactions {
archiveDir = new File(transactionDir, State.ARCHIVE.dirname); archiveDir = new File(transactionDir, State.ARCHIVE.dirname);
archive = new Snapshots(archiveDir); archive = new Snapshots(archiveDir);
} }
public static synchronized void migrateIPV6Snapshots() {
executor.shutdown();
try {
executor.awaitTermination(10, TimeUnit.SECONDS);
} catch (final InterruptedException e) {
return;
}
}
/** /**
* get the number of entries for each of the transaction states * get the number of entries for each of the transaction states
@ -118,7 +128,7 @@ public class Transactions {
/** /**
* list the snapshots for a given host name * list the snapshots for a given host name
* @param hostport the <host>.<port> identifier for the domain * @param hostport the <host>.<port> identifier for the domain (with the same format as applied by the Snapshots.pathToHostPortDir() function).
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth * @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
* @param state the wanted transaction state, State.INVENTORY, State.ARCHIVE or State.ANY * @param state the wanted transaction state, State.INVENTORY, State.ARCHIVE or State.ANY
* @return a map with a set for each depth in the domain of the host name * @return a map with a set for each depth in the domain of the host name
@ -199,7 +209,9 @@ public class Transactions {
// CLEAN UP OLD DATA (if wanted) // CLEAN UP OLD DATA (if wanted)
Collection<File> oldPaths = Transactions.findPaths(url, depth, null, Transactions.State.INVENTORY); Collection<File> oldPaths = Transactions.findPaths(url, depth, null, Transactions.State.INVENTORY);
if (replaceOld && oldPaths != null) { if (replaceOld && oldPaths != null) {
for (File oldPath: oldPaths) oldPath.delete(); for (File oldPath: oldPaths) {
oldPath.delete();
}
} }
// STORE METADATA FOR THE IMAGE // STORE METADATA FOR THE IMAGE