mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Fixed raw IPV6 addresses snapshots read/write on FAT32 and NTFS fs
Fixes issue #225
This commit is contained in:
parent
10548229af
commit
7adbd1f87d
|
@ -25,6 +25,9 @@ import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -194,7 +197,7 @@ public class Snapshots {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* list the snapshots for a given host name
|
* list the snapshots for a given host name
|
||||||
* @param hostport the <host>.<port> identifier for the domain
|
* @param hostport the <host>.<port> identifier for the domain (with the same format as applied by the Snapshots.pathToHostPortDir() function)
|
||||||
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
|
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
|
||||||
* @return a map with a set for each depth in the domain of the host name
|
* @return a map with a set for each depth in the domain of the host name
|
||||||
*/
|
*/
|
||||||
|
@ -244,8 +247,7 @@ public class Snapshots {
|
||||||
public File definePath(final DigestURL url, final int depth, final Date date, final String ext) {
|
public File definePath(final DigestURL url, final int depth, final Date date, final String ext) {
|
||||||
String id = ASCII.String(url.hash());
|
String id = ASCII.String(url.hash());
|
||||||
String ds = GenericFormatter.SHORT_MINUTE_FORMATTER.format(date);
|
String ds = GenericFormatter.SHORT_MINUTE_FORMATTER.format(date);
|
||||||
File path = new File(pathToShard(url, depth), id + "." + ds + "." + ext);
|
return new File(pathToShard(url, depth), id + "." + ds + "." + ext);
|
||||||
return path;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -268,7 +270,7 @@ public class Snapshots {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete information about the storage of a snapshot to the Snapshot-internal index.
|
* Delete information about the storage of a snapshot to the Snapshot-internal index.
|
||||||
* The actual deletion of files in the target directory must be done elsewehre, this method does not store the snapshot files.
|
* The actual deletion of files in the target directory must be done elsewhere, this method does not store the snapshot files.
|
||||||
* @param url
|
* @param url
|
||||||
* @param depth
|
* @param depth
|
||||||
* @param date
|
* @param date
|
||||||
|
@ -337,16 +339,28 @@ public class Snapshots {
|
||||||
if (host != null && depth == null) {
|
if (host != null && depth == null) {
|
||||||
String hostport = pathToHostPortDir(host, 80);
|
String hostport = pathToHostPortDir(host, 80);
|
||||||
TreeMap<Integer, TreeSet<String>> depthIdsMap = this.directory.get(hostport);
|
TreeMap<Integer, TreeSet<String>> depthIdsMap = this.directory.get(hostport);
|
||||||
if (depthIdsMap != null) loop: for (Map.Entry<Integer, TreeSet<String>> depthIds: depthIdsMap.entrySet()) {
|
if(depthIdsMap == null && isIpv6AddrHost(host)) {
|
||||||
|
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
|
||||||
|
hostport = pathToHostPortDir(host, 80, false);
|
||||||
|
depthIdsMap = this.directory.get(hostport);
|
||||||
|
}
|
||||||
|
if (depthIdsMap != null) {
|
||||||
|
loop: for (Map.Entry<Integer, TreeSet<String>> depthIds: depthIdsMap.entrySet()) {
|
||||||
for (String id: depthIds.getValue()) {
|
for (String id: depthIds.getValue()) {
|
||||||
dateIdResult.put(id, new String[]{hostport, Integer.toString(depthIds.getKey())});
|
dateIdResult.put(id, new String[]{hostport, Integer.toString(depthIds.getKey())});
|
||||||
if (order == Order.ANY && dateIdResult.size() >= maxcount) break loop;
|
if (order == Order.ANY && dateIdResult.size() >= maxcount) break loop;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (host != null && depth != null) {
|
if (host != null && depth != null) {
|
||||||
String hostport = pathToHostPortDir(host, 80);
|
String hostport = pathToHostPortDir(host, 80);
|
||||||
TreeMap<Integer, TreeSet<String>> domaindepth = this.directory.get(hostport);
|
TreeMap<Integer, TreeSet<String>> domaindepth = this.directory.get(hostport);
|
||||||
|
if(domaindepth == null && isIpv6AddrHost(host)) {
|
||||||
|
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
|
||||||
|
hostport = pathToHostPortDir(host, 80, false);
|
||||||
|
domaindepth = this.directory.get(hostport);
|
||||||
|
}
|
||||||
if (domaindepth != null) {
|
if (domaindepth != null) {
|
||||||
TreeSet<String> ids = domaindepth.get(depth);
|
TreeSet<String> ids = domaindepth.get(depth);
|
||||||
if (ids != null) loop: for (String id: ids) {
|
if (ids != null) loop: for (String id: ids) {
|
||||||
|
@ -430,6 +444,10 @@ public class Snapshots {
|
||||||
public Collection<File> findPaths(final DigestURL url, final int depth, final String ext) {
|
public Collection<File> findPaths(final DigestURL url, final int depth, final String ext) {
|
||||||
String id = ASCII.String(url.hash());
|
String id = ASCII.String(url.hash());
|
||||||
File pathToShard = pathToShard(url, depth);
|
File pathToShard = pathToShard(url, depth);
|
||||||
|
if(!pathToShard.exists() && isIpv6AddrHost(url.getHost())) {
|
||||||
|
/* If the host is a raw IPV6 address, we check also if a snapshot was recorded with the old format (without percent-encoding) */
|
||||||
|
pathToShard = pathToShard(pathToHostPortDir(url.getHost(), url.getPort(), false), ASCII.String(url.hash()), depth);
|
||||||
|
}
|
||||||
String[] list = pathToShard.exists() && pathToShard.isDirectory() ? pathToShard.list() : null; // may be null if path does not exist
|
String[] list = pathToShard.exists() && pathToShard.isDirectory() ? pathToShard.list() : null; // may be null if path does not exist
|
||||||
ArrayList<File> paths = new ArrayList<>();
|
ArrayList<File> paths = new ArrayList<>();
|
||||||
if (list != null) {
|
if (list != null) {
|
||||||
|
@ -451,8 +469,40 @@ public class Snapshots {
|
||||||
return pathToShard;
|
return pathToShard;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param host a domain name or IP address
|
||||||
|
* @return true when the host string is a raw IPV6 address (with square brackets)
|
||||||
|
*/
|
||||||
|
private boolean isIpv6AddrHost(final String host) {
|
||||||
|
return (host != null && host.startsWith("[") && host.endsWith("]") && host.contains(":"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param host a domain name or IP address
|
||||||
|
* @param port a port number
|
||||||
|
* @return a representation of the host and port encoding IPV6 addresses for better support accross file systems (notably FAT or NTFS)
|
||||||
|
*/
|
||||||
private String pathToHostPortDir(final String host, final int port) {
|
private String pathToHostPortDir(final String host, final int port) {
|
||||||
return host + "." + port;
|
return pathToHostPortDir(host, port, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param host a domain name or IP address
|
||||||
|
* @param port a port number
|
||||||
|
* @param encodeIpv6 when true, encode the host for better support accross file systems (notably FAT or NTFS)
|
||||||
|
* @return a representation of the host and port
|
||||||
|
*/
|
||||||
|
private String pathToHostPortDir(final String host, final int port, final boolean encodeIpv6) {
|
||||||
|
String encodedHost = host;
|
||||||
|
if(encodeIpv6 && isIpv6AddrHost(host)) {
|
||||||
|
/* Percent-encode the host name when it is an IPV6 address, as the ':' character is illegal in a file name on MS Windows FAT32 and NTFS file systems */
|
||||||
|
try {
|
||||||
|
encodedHost = URLEncoder.encode(host, StandardCharsets.UTF_8.name());
|
||||||
|
} catch (final UnsupportedEncodingException e) {
|
||||||
|
/* This should not happen has UTF-8 encoding support is required for any JVM implementation */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return encodedHost + "." + port;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String pathToDepthDir(final int depth) {
|
private String pathToDepthDir(final int depth) {
|
||||||
|
|
|
@ -35,6 +35,7 @@ import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
@ -85,6 +86,15 @@ public class Transactions {
|
||||||
archive = new Snapshots(archiveDir);
|
archive = new Snapshots(archiveDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static synchronized void migrateIPV6Snapshots() {
|
||||||
|
executor.shutdown();
|
||||||
|
try {
|
||||||
|
executor.awaitTermination(10, TimeUnit.SECONDS);
|
||||||
|
} catch (final InterruptedException e) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the number of entries for each of the transaction states
|
* get the number of entries for each of the transaction states
|
||||||
* @return the total number of different documents for each transaction state
|
* @return the total number of different documents for each transaction state
|
||||||
|
@ -118,7 +128,7 @@ public class Transactions {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* list the snapshots for a given host name
|
* list the snapshots for a given host name
|
||||||
* @param hostport the <host>.<port> identifier for the domain
|
* @param hostport the <host>.<port> identifier for the domain (with the same format as applied by the Snapshots.pathToHostPortDir() function).
|
||||||
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
|
* @param depth restrict the result to the given depth or if depth == -1 do not restrict to a depth
|
||||||
* @param state the wanted transaction state, State.INVENTORY, State.ARCHIVE or State.ANY
|
* @param state the wanted transaction state, State.INVENTORY, State.ARCHIVE or State.ANY
|
||||||
* @return a map with a set for each depth in the domain of the host name
|
* @return a map with a set for each depth in the domain of the host name
|
||||||
|
@ -199,7 +209,9 @@ public class Transactions {
|
||||||
// CLEAN UP OLD DATA (if wanted)
|
// CLEAN UP OLD DATA (if wanted)
|
||||||
Collection<File> oldPaths = Transactions.findPaths(url, depth, null, Transactions.State.INVENTORY);
|
Collection<File> oldPaths = Transactions.findPaths(url, depth, null, Transactions.State.INVENTORY);
|
||||||
if (replaceOld && oldPaths != null) {
|
if (replaceOld && oldPaths != null) {
|
||||||
for (File oldPath: oldPaths) oldPath.delete();
|
for (File oldPath: oldPaths) {
|
||||||
|
oldPath.delete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// STORE METADATA FOR THE IMAGE
|
// STORE METADATA FOR THE IMAGE
|
||||||
|
|
Loading…
Reference in New Issue
Block a user