/batik-1.8/sources/org/apache/batik/util/ParsedURLData.java
Java | 631 lines | 414 code | 77 blank | 140 comment | 194 complexity | 9cb099885d68f5167eb585e4e18cefda MD5 | raw file
Possible License(s): Apache-2.0, GPL-2.0, IPL-1.0
- /*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package org.apache.batik.util;
- import java.io.BufferedInputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.net.HttpURLConnection;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.net.URLConnection;
- import java.util.Iterator;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.zip.GZIPInputStream;
- import java.util.zip.InflaterInputStream;
- import java.util.zip.ZipException;
- /**
- * Holds the data for more URLs.
- *
- * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
- * @version $Id: ParsedURLData.java 1004896 2010-10-06 05:44:11Z helder $
- */
- public class ParsedURLData {
- protected static final String HTTP_USER_AGENT_HEADER = "User-Agent";
- protected static final String HTTP_ACCEPT_HEADER = "Accept";
- protected static final String HTTP_ACCEPT_LANGUAGE_HEADER = "Accept-Language";
- protected static final String HTTP_ACCEPT_ENCODING_HEADER = "Accept-Encoding";
- protected static List acceptedEncodings = new LinkedList();
- static {
- acceptedEncodings.add("gzip");
- }
- /**
- * GZIP header magic number bytes, like found in a gzipped
- * files, which are encoded in Intel format (i.e. little indian).
- */
- public static final byte[] GZIP_MAGIC = {(byte)0x1f, (byte)0x8b};
- /**
- * This is a utility function others can call that checks if
- * is is a GZIP stream if so it returns a GZIPInputStream that
- * will decode the contents, otherwise it returns (or a
- * buffered version of is) untouched.
- * @param is Stream that may potentially be a GZIP stream.
- */
- public static InputStream checkGZIP(InputStream is)
- throws IOException {
- if (!is.markSupported())
- is = new BufferedInputStream(is);
- byte[] data = new byte[2];
- try {
- is.mark(2);
- is.read(data);
- is.reset();
- } catch (Exception ex) {
- is.reset();
- return is;
- }
- if ((data[0] == GZIP_MAGIC[0]) &&
- (data[1] == GZIP_MAGIC[1]))
- return new GZIPInputStream(is);
- if (((data[0]&0x0F) == 8) &&
- ((data[0]>>>4) <= 7)) {
- // Check for a zlib (deflate) stream
- int chk = ((((int)data[0])&0xFF)*256+
- (((int)data[1])&0xFF));
- if ((chk %31) == 0) {
- try {
- // I'm not really as certain of this check
- // as I would like so I want to force it
- // to decode part of the stream.
- is.mark(100);
- InputStream ret = new InflaterInputStream(is);
- if (!ret.markSupported())
- ret = new BufferedInputStream(ret);
- ret.mark(2);
- ret.read(data);
- is.reset();
- ret = new InflaterInputStream(is);
- return ret;
- } catch (ZipException ze) {
- is.reset();
- return is;
- }
- }
- }
- return is;
- }
- /**
- * Since the Data instance is 'hidden' in the ParsedURL
- * instance we make all our methods public. This makes it
- * easy for the various Protocol Handlers to update an
- * instance as parsing proceeds.
- */
- public String protocol = null;
- public String host = null;
- public int port = -1;
- public String path = null;
- public String ref = null;
- public String contentType = null;
- public String contentEncoding = null;
- public InputStream stream = null;
- public boolean hasBeenOpened = false;
- /**
- * The extracted type/subtype from the Content-Type header.
- */
- protected String contentTypeMediaType;
- /**
- * The extracted charset parameter from the Content-Type header.
- */
- protected String contentTypeCharset;
- /**
- * The URL that was ultimately used to fetch the resource.
- */
- protected URL postConnectionURL;
- /**
- * Void constructor
- */
- public ParsedURLData() {
- }
- /**
- * Build from an existing URL.
- */
- public ParsedURLData(URL url) {
- protocol = url.getProtocol();
- if ((protocol != null) && (protocol.length() == 0))
- protocol = null;
- host = url.getHost();
- if ((host != null) && (host.length() == 0))
- host = null;
- port = url.getPort();
- path = url.getFile();
- if ((path != null) && (path.length() == 0))
- path = null;
- ref = url.getRef();
- if ((ref != null) && (ref.length() == 0))
- ref = null;
- }
- /**
- * Attempts to build a normal java.net.URL instance from this
- * URL.
- */
- protected URL buildURL() throws MalformedURLException {
- // System.out.println("File: " + file);
- // if (ref != null)
- // file += "#" + ref;
- // System.err.println("Building: " + protocol + " - " +
- // host + " - " + path);
- if ((protocol != null) && (host != null)) {
- String file = "";
- if (path != null)
- file = path;
- if (port == -1)
- return new URL(protocol, host, file);
- return new URL(protocol, host, port, file);
- }
- return new URL(toString());
- }
- /**
- * Implement Object.hashCode.
- */
- public int hashCode() {
- int hc = port;
- if (protocol != null)
- hc ^= protocol.hashCode();
- if (host != null)
- hc ^= host.hashCode();
- // For some URLs path and ref can get fairly long
- // and the most unique part is towards the end
- // so we grab that part for HC purposes
- if (path != null) {
- int len = path.length();
- if (len > 20)
- hc ^= path.substring(len-20).hashCode();
- else
- hc ^= path.hashCode();
- }
- if (ref != null) {
- int len = ref.length();
- if (len > 20)
- hc ^= ref.substring(len-20).hashCode();
- else
- hc ^= ref.hashCode();
- }
- return hc;
- }
- /**
- * Implement Object.equals for ParsedURLData.
- */
- public boolean equals(Object obj) {
- if (obj == null) return false;
- if (! (obj instanceof ParsedURLData))
- return false;
- ParsedURLData ud = (ParsedURLData)obj;
- if (ud.port != port)
- return false;
- if (ud.protocol==null) {
- if (protocol != null)
- return false;
- } else if (protocol == null)
- return false;
- else if (!ud.protocol.equals(protocol))
- return false;
- if (ud.host==null) {
- if (host !=null)
- return false;
- } else if (host == null)
- return false;
- else if (!ud.host.equals(host))
- return false;
- if (ud.ref==null) {
- if (ref !=null)
- return false;
- } else if (ref == null)
- return false;
- else if (!ud.ref.equals(ref))
- return false;
- if (ud.path==null) {
- if (path !=null)
- return false;
- } else if (path == null)
- return false;
- else if (!ud.path.equals(path))
- return false;
- return true;
- }
- /**
- * Returns the content type if available. This is only available
- * for some protocols.
- */
- public String getContentType(String userAgent) {
- if (contentType != null)
- return contentType;
- if (!hasBeenOpened) {
- try {
- openStreamInternal(userAgent, null, null);
- } catch (IOException ioe) { /* nothing */ }
- }
- return contentType;
- }
- /**
- * Returns the content type's type/subtype, if available. This is
- * only available for some protocols.
- */
- public String getContentTypeMediaType(String userAgent) {
- if (contentTypeMediaType != null) {
- return contentTypeMediaType;
- }
- extractContentTypeParts(userAgent);
- return contentTypeMediaType;
- }
- /**
- * Returns the content type's charset parameter, if available. This is
- * only available for some protocols.
- */
- public String getContentTypeCharset(String userAgent) {
- if (contentTypeMediaType != null) {
- return contentTypeCharset;
- }
- extractContentTypeParts(userAgent);
- return contentTypeCharset;
- }
- /**
- * Returns whether the Content-Type header has the given parameter.
- */
- public boolean hasContentTypeParameter(String userAgent, String param) {
- getContentType(userAgent);
- if (contentType == null) {
- return false;
- }
- int i = 0;
- int len = contentType.length();
- int plen = param.length();
- loop1: while (i < len) {
- switch (contentType.charAt(i)) {
- case ' ':
- case ';':
- break loop1;
- }
- i++;
- }
- if (i == len) {
- contentTypeMediaType = contentType;
- } else {
- contentTypeMediaType = contentType.substring(0, i);
- }
- loop2: for (;;) {
- while (i < len && contentType.charAt(i) != ';') {
- i++;
- }
- if (i == len) {
- return false;
- }
- i++;
- while (i < len && contentType.charAt(i) == ' ') {
- i++;
- }
- if (i >= len - plen - 1) {
- return false;
- }
- for (int j = 0; j < plen; j++) {
- if (!(contentType.charAt(i++) == param.charAt(j))) {
- continue loop2;
- }
- }
- if (contentType.charAt(i) == '=') {
- return true;
- }
- }
- }
- /**
- * Extracts the type/subtype and charset parameter from the Content-Type
- * header.
- */
- protected void extractContentTypeParts(String userAgent) {
- getContentType(userAgent);
- if (contentType == null) {
- return;
- }
- int i = 0;
- int len = contentType.length();
- loop1: while (i < len) {
- switch (contentType.charAt(i)) {
- case ' ':
- case ';':
- break loop1;
- }
- i++;
- }
- if (i == len) {
- contentTypeMediaType = contentType;
- } else {
- contentTypeMediaType = contentType.substring(0, i);
- }
- for (;;) {
- while (i < len && contentType.charAt(i) != ';') {
- i++;
- }
- if (i == len) {
- return;
- }
- i++;
- while (i < len && contentType.charAt(i) == ' ') {
- i++;
- }
- if (i >= len - 8) {
- return;
- }
- if (contentType.charAt(i++) == 'c') {
- if (contentType.charAt(i++) != 'h') continue;
- if (contentType.charAt(i++) != 'a') continue;
- if (contentType.charAt(i++) != 'r') continue;
- if (contentType.charAt(i++) != 's') continue;
- if (contentType.charAt(i++) != 'e') continue;
- if (contentType.charAt(i++) != 't') continue;
- if (contentType.charAt(i++) != '=') continue;
- int j = i;
- loop2: while (i < len) {
- switch (contentType.charAt(i)) {
- case ' ':
- case ';':
- break loop2;
- }
- i++;
- }
- contentTypeCharset = contentType.substring(j, i);
- return;
- }
- }
- }
- /**
- * Returns the content encoding if available. This is only available
- * for some protocols.
- */
- public String getContentEncoding(String userAgent) {
- if (contentEncoding != null)
- return contentEncoding;
- if (!hasBeenOpened) {
- try {
- openStreamInternal(userAgent, null, null);
- } catch (IOException ioe) { /* nothing */ }
- }
- return contentEncoding;
- }
- /**
- * Returns true if the URL looks well formed and complete.
- * This does not garuntee that the stream can be opened but
- * is a good indication that things aren't totally messed up.
- */
- public boolean complete() {
- try {
- buildURL();
- } catch (MalformedURLException mue) {
- return false;
- }
- return true;
- }
- /**
- * Open the stream and check for common compression types. If
- * the stream is found to be compressed with a standard
- * compression type it is automatically decompressed.
- * @param userAgent The user agent opening the stream (may be null).
- * @param mimeTypes The expected mime types of the content
- * in the returned InputStream (mapped to Http accept
- * header among other possability). The elements of
- * the iterator must be strings (may be null)
- */
- public InputStream openStream(String userAgent, Iterator mimeTypes)
- throws IOException {
- InputStream raw = openStreamInternal(userAgent, mimeTypes,
- acceptedEncodings.iterator());
- if (raw == null)
- return null;
- stream = null;
- return checkGZIP(raw);
- }
- /**
- * Open the stream and returns it. No checks are made to see
- * if the stream is compressed or encoded in any way.
- * @param userAgent The user agent opening the stream (may be null).
- * @param mimeTypes The expected mime types of the content
- * in the returned InputStream (mapped to Http accept
- * header among other possability). The elements of
- * the iterator must be strings (may be null)
- */
- public InputStream openStreamRaw(String userAgent, Iterator mimeTypes)
- throws IOException {
- InputStream ret = openStreamInternal(userAgent, mimeTypes, null);
- stream = null;
- return ret;
- }
- protected InputStream openStreamInternal(String userAgent,
- Iterator mimeTypes,
- Iterator encodingTypes)
- throws IOException {
- if (stream != null)
- return stream;
- hasBeenOpened = true;
- URL url = null;
- try {
- url = buildURL();
- } catch (MalformedURLException mue) {
- throw new IOException
- ("Unable to make sense of URL for connection");
- }
- if (url == null)
- return null;
- URLConnection urlC = url.openConnection();
- if (urlC instanceof HttpURLConnection) {
- if (userAgent != null)
- urlC.setRequestProperty(HTTP_USER_AGENT_HEADER, userAgent);
- if (mimeTypes != null) {
- String acceptHeader = "";
- while (mimeTypes.hasNext()) {
- acceptHeader += mimeTypes.next();
- if (mimeTypes.hasNext())
- acceptHeader += ",";
- }
- urlC.setRequestProperty(HTTP_ACCEPT_HEADER, acceptHeader);
- }
- if (encodingTypes != null) {
- String encodingHeader = "";
- while (encodingTypes.hasNext()) {
- encodingHeader += encodingTypes.next();
- if (encodingTypes.hasNext())
- encodingHeader += ",";
- }
- urlC.setRequestProperty(HTTP_ACCEPT_ENCODING_HEADER,
- encodingHeader);
- }
- contentType = urlC.getContentType();
- contentEncoding = urlC.getContentEncoding();
- postConnectionURL = urlC.getURL();
- }
- try {
- return (stream = urlC.getInputStream());
- } catch (IOException e) {
- if (urlC instanceof HttpURLConnection) {
- // bug 49889: if available, return the error stream
- // (allow interpretation of content in the HTTP error response)
- return (stream = ((HttpURLConnection) urlC).getErrorStream());
- } else {
- throw e;
- }
- }
- }
- /**
- * Returns the URL up to and include the port number on
- * the host. Does not include the path or fragment pieces.
- */
- public String getPortStr() {
- String portStr ="";
- if (protocol != null)
- portStr += protocol + ":";
- if ((host != null) || (port != -1)) {
- portStr += "//";
- if (host != null) portStr += host;
- if (port != -1) portStr += ":" + port;
- }
- return portStr;
- }
- protected boolean sameFile(ParsedURLData other) {
- if (this == other) return true;
- // Check if the rest of the two PURLs matche other than
- // the 'ref'
- if ((port == other.port) &&
- ((path == other.path)
- || ((path!=null) && path.equals(other.path))) &&
- ((host == other.host)
- || ((host!=null) && host.equals(other.host))) &&
- ((protocol == other.protocol)
- || ((protocol!=null) && protocol.equals(other.protocol))))
- return true;
- return false;
- }
- /**
- * Return a string representation of the data.
- */
- public String toString() {
- String ret = getPortStr();
- if (path != null)
- ret += path;
- if (ref != null)
- ret += "#" + ref;
- return ret;
- }
- /**
- * Returns the URL that was ultimately used to fetch the resource
- * represented by the <code>ParsedURL</code>.
- */
- public String getPostConnectionURL() {
- if (postConnectionURL != null) {
- if (ref != null) {
- return postConnectionURL.toString() + '#' + ref;
- }
- return postConnectionURL.toString();
- }
- return toString();
- }
- }