mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Fixed raw IPV6 addresses snapshots read/write on FAT32 and NTFS fs
Fixes issue #225
This commit is contained in:
parent
10548229af
commit
7adbd1f87d
|
@ -25,6 +25,9 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -194,7 +197,7 @@ public class Snapshots {
|
|||
|
||||
/**
|
||||
* list the snapshots for a given host name
|
||||
* @param hostport the <host>.<port> identifier for the domain
|
||||
* @param hostport the <host>.<port> identifier for the domain (with the same format as applied by the Snapshots.pathToHostPortDir() function)
|
||||
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
|
||||
* @return a map with a set for each depth in the domain of the host name
|
||||
*/
|
||||
|
@ -244,8 +247,7 @@ public class Snapshots {
|
|||
public File definePath(final DigestURL url, final int depth, final Date date, final String ext) {
|
||||
String id = ASCII.String(url.hash());
|
||||
String ds = GenericFormatter.SHORT_MINUTE_FORMATTER.format(date);
|
||||
File path = new File(pathToShard(url, depth), id + "." + ds + "." + ext);
|
||||
return path;
|
||||
return new File(pathToShard(url, depth), id + "." + ds + "." + ext);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -268,7 +270,7 @@ public class Snapshots {
|
|||
|
||||
/**
|
||||
* Delete information about the storage of a snapshot to the Snapshot-internal index.
|
||||
* The actual deletion of files in the target directory must be done elsewehre, this method does not store the snapshot files.
|
||||
* The actual deletion of files in the target directory must be done elsewhere, this method does not store the snapshot files.
|
||||
* @param url
|
||||
* @param depth
|
||||
* @param date
|
||||
|
@ -335,18 +337,30 @@ public class Snapshots {
|
|||
}
|
||||
}
|
||||
if (host != null && depth == null) {
|
||||
String hostport = pathToHostPortDir(host,80);
|
||||
String hostport = pathToHostPortDir(host, 80);
|
||||
TreeMap<Integer, TreeSet<String>> depthIdsMap = this.directory.get(hostport);
|
||||
if (depthIdsMap != null) loop: for (Map.Entry<Integer, TreeSet<String>> depthIds: depthIdsMap.entrySet()) {
|
||||
for (String id: depthIds.getValue()) {
|
||||
dateIdResult.put(id, new String[]{hostport, Integer.toString(depthIds.getKey())});
|
||||
if (order == Order.ANY && dateIdResult.size() >= maxcount) break loop;
|
||||
}
|
||||
if(depthIdsMap == null && isIpv6AddrHost(host)) {
|
||||
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
|
||||
hostport = pathToHostPortDir(host, 80, false);
|
||||
depthIdsMap = this.directory.get(hostport);
|
||||
}
|
||||
if (depthIdsMap != null) {
|
||||
loop: for (Map.Entry<Integer, TreeSet<String>> depthIds: depthIdsMap.entrySet()) {
|
||||
for (String id: depthIds.getValue()) {
|
||||
dateIdResult.put(id, new String[]{hostport, Integer.toString(depthIds.getKey())});
|
||||
if (order == Order.ANY && dateIdResult.size() >= maxcount) break loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (host != null && depth != null) {
|
||||
String hostport = pathToHostPortDir(host,80);
|
||||
String hostport = pathToHostPortDir(host, 80);
|
||||
TreeMap<Integer, TreeSet<String>> domaindepth = this.directory.get(hostport);
|
||||
if(domaindepth == null && isIpv6AddrHost(host)) {
|
||||
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
|
||||
hostport = pathToHostPortDir(host, 80, false);
|
||||
domaindepth = this.directory.get(hostport);
|
||||
}
|
||||
if (domaindepth != null) {
|
||||
TreeSet<String> ids = domaindepth.get(depth);
|
||||
if (ids != null) loop: for (String id: ids) {
|
||||
|
@ -430,6 +444,10 @@ public class Snapshots {
|
|||
public Collection<File> findPaths(final DigestURL url, final int depth, final String ext) {
|
||||
String id = ASCII.String(url.hash());
|
||||
File pathToShard = pathToShard(url, depth);
|
||||
if(!pathToShard.exists() && isIpv6AddrHost(url.getHost())) {
|
||||
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
|
||||
pathToShard = pathToShard(pathToHostPortDir(url.getHost(), url.getPort(), false), ASCII.String(url.hash()), depth);
|
||||
}
|
||||
String[] list = pathToShard.exists() && pathToShard.isDirectory() ? pathToShard.list() : null; // may be null if path does not exist
|
||||
ArrayList<File> paths = new ArrayList<>();
|
||||
if (list != null) {
|
||||
|
@ -451,8 +469,40 @@ public class Snapshots {
|
|||
return pathToShard;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param host a domain name or IP address
|
||||
* @return true when the host string is a raw IPV6 address (with square brackets)
|
||||
*/
|
||||
private boolean isIpv6AddrHost(final String host) {
|
||||
return (host != null && host.startsWith("[") && host.endsWith("]") && host.contains(":"));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param host a domain name or IP address
|
||||
* @param port a port number
|
||||
* @return a representation of the host and port encoding IPV6 addresses for better support accross file systems (notably FAT or NTFS)
|
||||
*/
|
||||
private String pathToHostPortDir(final String host, final int port) {
|
||||
return host + "." + port;
|
||||
return pathToHostPortDir(host, port, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param host a domain name or IP address
|
||||
* @param port a port number
|
||||
* @param encodeIpv6 when true, encode the host for better support accross file systems (notably FAT or NTFS)
|
||||
* @return a representation of the host and port
|
||||
*/
|
||||
private String pathToHostPortDir(final String host, final int port, final boolean encodeIpv6) {
|
||||
String encodedHost = host;
|
||||
if(encodeIpv6 && isIpv6AddrHost(host)) {
|
||||
/* Percent-encode the host name when it is an IPV6 address, as the ':' character is illegal in a file name on MS Windows FAT32 and NTFS file systems */
|
||||
try {
|
||||
encodedHost = URLEncoder.encode(host, StandardCharsets.UTF_8.name());
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
/* This should not happen has UTF-8 encoding support is required for any JVM implementation */
|
||||
}
|
||||
}
|
||||
return encodedHost + "." + port;
|
||||
}
|
||||
|
||||
private String pathToDepthDir(final int depth) {
|
||||
|
|
|
@ -35,6 +35,7 @@ import java.util.Set;
|
|||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
@ -85,6 +86,15 @@ public class Transactions {
|
|||
archive = new Snapshots(archiveDir);
|
||||
}
|
||||
|
||||
public static synchronized void migrateIPV6Snapshots() {
|
||||
executor.shutdown();
|
||||
try {
|
||||
executor.awaitTermination(10, TimeUnit.SECONDS);
|
||||
} catch (final InterruptedException e) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get the number of entries for each of the transaction states
|
||||
* @return the total number of different documents for each transaction state
|
||||
|
@ -118,7 +128,7 @@ public class Transactions {
|
|||
|
||||
/**
|
||||
* list the snapshots for a given host name
|
||||
* @param hostport the <host>.<port> identifier for the domain
|
||||
* @param hostport the <host>.<port> identifier for the domain (with the same format as applied by the Snapshots.pathToHostPortDir() function).
|
||||
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
|
||||
* @param state the wanted transaction state, State.INVENTORY, State.ARCHIVE or State.ANY
|
||||
* @return a map with a set for each depth in the domain of the host name
|
||||
|
@ -199,7 +209,9 @@ public class Transactions {
|
|||
// CLEAN UP OLD DATA (if wanted)
|
||||
Collection<File> oldPaths = Transactions.findPaths(url, depth, null, Transactions.State.INVENTORY);
|
||||
if (replaceOld && oldPaths != null) {
|
||||
for (File oldPath: oldPaths) oldPath.delete();
|
||||
for (File oldPath: oldPaths) {
|
||||
oldPath.delete();
|
||||
}
|
||||
}
|
||||
|
||||
// STORE METADATA FOR THE IMAGE
|
||||
|
|
Loading…
Reference in New Issue
Block a user