diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.java index 35189637a5..83cc98b49c 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.java @@ -88,7 +88,7 @@ public class UniprotProxySequenceReader implements ProxySequ private static final String TREMBLID_PATTERN = "[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"; public static final Pattern UP_AC_PATTERN = Pattern.compile("(" + SPID_PATTERN + "|" + TREMBLID_PATTERN + ")"); - private static String uniprotbaseURL = "http://www.uniprot.org"; //"http://pir.uniprot.org"; + private static String uniprotbaseURL = "https://www.uniprot.org"; //"http://pir.uniprot.org"; private static String uniprotDirectoryCache = null; private String sequence; private CompoundSet compoundSet; @@ -414,6 +414,61 @@ private void writeCache(StringBuilder sb, String accession) throws IOException { fw.write(sb.toString()); fw.close(); } + + /** + * Open a URL connection. + * + * Follows redirects. + * @param url + * @throws IOException + */ + private static HttpURLConnection openURLConnection(URL url) throws IOException { + // This method should be moved to a utility class in BioJava 5.0 + + final int timeout = 5000; + final String useragent = "BioJava"; + + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty("User-Agent", useragent); + conn.setInstanceFollowRedirects(true); + conn.setConnectTimeout(timeout); + conn.setReadTimeout(timeout); + + int status = conn.getResponseCode(); + while (status == HttpURLConnection.HTTP_MOVED_TEMP + || status == HttpURLConnection.HTTP_MOVED_PERM + || status == HttpURLConnection.HTTP_SEE_OTHER) { + // Redirect! + String newUrl = conn.getHeaderField("Location"); + + if(newUrl.equals(url.toString())) { + throw new IOException("Cyclic redirect detected at "+newUrl); + } + + // Preserve cookies + String cookies = conn.getHeaderField("Set-Cookie"); + + // open the new connection again + url = new URL(newUrl); + conn.disconnect(); + conn = (HttpURLConnection) url.openConnection(); + if(cookies != null) { + conn.setRequestProperty("Cookie", cookies); + } + conn.addRequestProperty("User-Agent", useragent); + conn.setInstanceFollowRedirects(true); + conn.setConnectTimeout(timeout); + conn.setReadTimeout(timeout); + conn.connect(); + + status = conn.getResponseCode(); + + logger.info("Redirecting from {} to {}", url, newUrl); + } + conn.connect(); + + return conn; + } private StringBuilder fetchUniprotXML(String uniprotURL) throws IOException, CompoundNotFoundException { @@ -423,11 +478,9 @@ private StringBuilder fetchUniprotXML(String uniprotURL) int attempt = 5; List errorCodes = new ArrayList(); while(attempt > 0) { - HttpURLConnection uniprotConnection = (HttpURLConnection) uniprot.openConnection(); - uniprotConnection.setRequestProperty("User-Agent", "BioJava"); - uniprotConnection.connect(); + HttpURLConnection uniprotConnection = openURLConnection(uniprot); int statusCode = uniprotConnection.getResponseCode(); - if (statusCode == 200) { + if (statusCode == HttpURLConnection.HTTP_OK) { BufferedReader in = new BufferedReader( new InputStreamReader( uniprotConnection.getInputStream()));