/batik-1.8/sources/org/apache/batik/util/ParsedURLData.java

# · Java · 631 lines · 414 code · 77 blank · 140 comment · 194 complexity · 9cb099885d68f5167eb585e4e18cefda MD5 · raw file

  1. /*
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. */
  15. package org.apache.batik.util;
  16. import java.io.BufferedInputStream;
  17. import java.io.IOException;
  18. import java.io.InputStream;
  19. import java.net.HttpURLConnection;
  20. import java.net.MalformedURLException;
  21. import java.net.URL;
  22. import java.net.URLConnection;
  23. import java.util.Iterator;
  24. import java.util.LinkedList;
  25. import java.util.List;
  26. import java.util.zip.GZIPInputStream;
  27. import java.util.zip.InflaterInputStream;
  28. import java.util.zip.ZipException;
  29. /**
  30. * Holds the data for more URLs.
  31. *
  32. * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
  33. * @version $Id: ParsedURLData.java 1004896 2010-10-06 05:44:11Z helder $
  34. */
  35. public class ParsedURLData {
  36. protected static final String HTTP_USER_AGENT_HEADER = "User-Agent";
  37. protected static final String HTTP_ACCEPT_HEADER = "Accept";
  38. protected static final String HTTP_ACCEPT_LANGUAGE_HEADER = "Accept-Language";
  39. protected static final String HTTP_ACCEPT_ENCODING_HEADER = "Accept-Encoding";
  40. protected static List acceptedEncodings = new LinkedList();
  41. static {
  42. acceptedEncodings.add("gzip");
  43. }
  44. /**
  45. * GZIP header magic number bytes, like found in a gzipped
  46. * files, which are encoded in Intel format (i&#x2e;e&#x2e; little indian).
  47. */
  48. public static final byte[] GZIP_MAGIC = {(byte)0x1f, (byte)0x8b};
  49. /**
  50. * This is a utility function others can call that checks if
  51. * is is a GZIP stream if so it returns a GZIPInputStream that
  52. * will decode the contents, otherwise it returns (or a
  53. * buffered version of is) untouched.
  54. * @param is Stream that may potentially be a GZIP stream.
  55. */
  56. public static InputStream checkGZIP(InputStream is)
  57. throws IOException {
  58. if (!is.markSupported())
  59. is = new BufferedInputStream(is);
  60. byte[] data = new byte[2];
  61. try {
  62. is.mark(2);
  63. is.read(data);
  64. is.reset();
  65. } catch (Exception ex) {
  66. is.reset();
  67. return is;
  68. }
  69. if ((data[0] == GZIP_MAGIC[0]) &&
  70. (data[1] == GZIP_MAGIC[1]))
  71. return new GZIPInputStream(is);
  72. if (((data[0]&0x0F) == 8) &&
  73. ((data[0]>>>4) <= 7)) {
  74. // Check for a zlib (deflate) stream
  75. int chk = ((((int)data[0])&0xFF)*256+
  76. (((int)data[1])&0xFF));
  77. if ((chk %31) == 0) {
  78. try {
  79. // I'm not really as certain of this check
  80. // as I would like so I want to force it
  81. // to decode part of the stream.
  82. is.mark(100);
  83. InputStream ret = new InflaterInputStream(is);
  84. if (!ret.markSupported())
  85. ret = new BufferedInputStream(ret);
  86. ret.mark(2);
  87. ret.read(data);
  88. is.reset();
  89. ret = new InflaterInputStream(is);
  90. return ret;
  91. } catch (ZipException ze) {
  92. is.reset();
  93. return is;
  94. }
  95. }
  96. }
  97. return is;
  98. }
  99. /**
  100. * Since the Data instance is 'hidden' in the ParsedURL
  101. * instance we make all our methods public. This makes it
  102. * easy for the various Protocol Handlers to update an
  103. * instance as parsing proceeds.
  104. */
  105. public String protocol = null;
  106. public String host = null;
  107. public int port = -1;
  108. public String path = null;
  109. public String ref = null;
  110. public String contentType = null;
  111. public String contentEncoding = null;
  112. public InputStream stream = null;
  113. public boolean hasBeenOpened = false;
  114. /**
  115. * The extracted type/subtype from the Content-Type header.
  116. */
  117. protected String contentTypeMediaType;
  118. /**
  119. * The extracted charset parameter from the Content-Type header.
  120. */
  121. protected String contentTypeCharset;
  122. /**
  123. * The URL that was ultimately used to fetch the resource.
  124. */
  125. protected URL postConnectionURL;
  126. /**
  127. * Void constructor
  128. */
  129. public ParsedURLData() {
  130. }
  131. /**
  132. * Build from an existing URL.
  133. */
  134. public ParsedURLData(URL url) {
  135. protocol = url.getProtocol();
  136. if ((protocol != null) && (protocol.length() == 0))
  137. protocol = null;
  138. host = url.getHost();
  139. if ((host != null) && (host.length() == 0))
  140. host = null;
  141. port = url.getPort();
  142. path = url.getFile();
  143. if ((path != null) && (path.length() == 0))
  144. path = null;
  145. ref = url.getRef();
  146. if ((ref != null) && (ref.length() == 0))
  147. ref = null;
  148. }
  149. /**
  150. * Attempts to build a normal java.net.URL instance from this
  151. * URL.
  152. */
  153. protected URL buildURL() throws MalformedURLException {
  154. // System.out.println("File: " + file);
  155. // if (ref != null)
  156. // file += "#" + ref;
  157. // System.err.println("Building: " + protocol + " - " +
  158. // host + " - " + path);
  159. if ((protocol != null) && (host != null)) {
  160. String file = "";
  161. if (path != null)
  162. file = path;
  163. if (port == -1)
  164. return new URL(protocol, host, file);
  165. return new URL(protocol, host, port, file);
  166. }
  167. return new URL(toString());
  168. }
  169. /**
  170. * Implement Object.hashCode.
  171. */
  172. public int hashCode() {
  173. int hc = port;
  174. if (protocol != null)
  175. hc ^= protocol.hashCode();
  176. if (host != null)
  177. hc ^= host.hashCode();
  178. // For some URLs path and ref can get fairly long
  179. // and the most unique part is towards the end
  180. // so we grab that part for HC purposes
  181. if (path != null) {
  182. int len = path.length();
  183. if (len > 20)
  184. hc ^= path.substring(len-20).hashCode();
  185. else
  186. hc ^= path.hashCode();
  187. }
  188. if (ref != null) {
  189. int len = ref.length();
  190. if (len > 20)
  191. hc ^= ref.substring(len-20).hashCode();
  192. else
  193. hc ^= ref.hashCode();
  194. }
  195. return hc;
  196. }
  197. /**
  198. * Implement Object.equals for ParsedURLData.
  199. */
  200. public boolean equals(Object obj) {
  201. if (obj == null) return false;
  202. if (! (obj instanceof ParsedURLData))
  203. return false;
  204. ParsedURLData ud = (ParsedURLData)obj;
  205. if (ud.port != port)
  206. return false;
  207. if (ud.protocol==null) {
  208. if (protocol != null)
  209. return false;
  210. } else if (protocol == null)
  211. return false;
  212. else if (!ud.protocol.equals(protocol))
  213. return false;
  214. if (ud.host==null) {
  215. if (host !=null)
  216. return false;
  217. } else if (host == null)
  218. return false;
  219. else if (!ud.host.equals(host))
  220. return false;
  221. if (ud.ref==null) {
  222. if (ref !=null)
  223. return false;
  224. } else if (ref == null)
  225. return false;
  226. else if (!ud.ref.equals(ref))
  227. return false;
  228. if (ud.path==null) {
  229. if (path !=null)
  230. return false;
  231. } else if (path == null)
  232. return false;
  233. else if (!ud.path.equals(path))
  234. return false;
  235. return true;
  236. }
  237. /**
  238. * Returns the content type if available. This is only available
  239. * for some protocols.
  240. */
  241. public String getContentType(String userAgent) {
  242. if (contentType != null)
  243. return contentType;
  244. if (!hasBeenOpened) {
  245. try {
  246. openStreamInternal(userAgent, null, null);
  247. } catch (IOException ioe) { /* nothing */ }
  248. }
  249. return contentType;
  250. }
  251. /**
  252. * Returns the content type's type/subtype, if available. This is
  253. * only available for some protocols.
  254. */
  255. public String getContentTypeMediaType(String userAgent) {
  256. if (contentTypeMediaType != null) {
  257. return contentTypeMediaType;
  258. }
  259. extractContentTypeParts(userAgent);
  260. return contentTypeMediaType;
  261. }
  262. /**
  263. * Returns the content type's charset parameter, if available. This is
  264. * only available for some protocols.
  265. */
  266. public String getContentTypeCharset(String userAgent) {
  267. if (contentTypeMediaType != null) {
  268. return contentTypeCharset;
  269. }
  270. extractContentTypeParts(userAgent);
  271. return contentTypeCharset;
  272. }
  273. /**
  274. * Returns whether the Content-Type header has the given parameter.
  275. */
  276. public boolean hasContentTypeParameter(String userAgent, String param) {
  277. getContentType(userAgent);
  278. if (contentType == null) {
  279. return false;
  280. }
  281. int i = 0;
  282. int len = contentType.length();
  283. int plen = param.length();
  284. loop1: while (i < len) {
  285. switch (contentType.charAt(i)) {
  286. case ' ':
  287. case ';':
  288. break loop1;
  289. }
  290. i++;
  291. }
  292. if (i == len) {
  293. contentTypeMediaType = contentType;
  294. } else {
  295. contentTypeMediaType = contentType.substring(0, i);
  296. }
  297. loop2: for (;;) {
  298. while (i < len && contentType.charAt(i) != ';') {
  299. i++;
  300. }
  301. if (i == len) {
  302. return false;
  303. }
  304. i++;
  305. while (i < len && contentType.charAt(i) == ' ') {
  306. i++;
  307. }
  308. if (i >= len - plen - 1) {
  309. return false;
  310. }
  311. for (int j = 0; j < plen; j++) {
  312. if (!(contentType.charAt(i++) == param.charAt(j))) {
  313. continue loop2;
  314. }
  315. }
  316. if (contentType.charAt(i) == '=') {
  317. return true;
  318. }
  319. }
  320. }
  321. /**
  322. * Extracts the type/subtype and charset parameter from the Content-Type
  323. * header.
  324. */
  325. protected void extractContentTypeParts(String userAgent) {
  326. getContentType(userAgent);
  327. if (contentType == null) {
  328. return;
  329. }
  330. int i = 0;
  331. int len = contentType.length();
  332. loop1: while (i < len) {
  333. switch (contentType.charAt(i)) {
  334. case ' ':
  335. case ';':
  336. break loop1;
  337. }
  338. i++;
  339. }
  340. if (i == len) {
  341. contentTypeMediaType = contentType;
  342. } else {
  343. contentTypeMediaType = contentType.substring(0, i);
  344. }
  345. for (;;) {
  346. while (i < len && contentType.charAt(i) != ';') {
  347. i++;
  348. }
  349. if (i == len) {
  350. return;
  351. }
  352. i++;
  353. while (i < len && contentType.charAt(i) == ' ') {
  354. i++;
  355. }
  356. if (i >= len - 8) {
  357. return;
  358. }
  359. if (contentType.charAt(i++) == 'c') {
  360. if (contentType.charAt(i++) != 'h') continue;
  361. if (contentType.charAt(i++) != 'a') continue;
  362. if (contentType.charAt(i++) != 'r') continue;
  363. if (contentType.charAt(i++) != 's') continue;
  364. if (contentType.charAt(i++) != 'e') continue;
  365. if (contentType.charAt(i++) != 't') continue;
  366. if (contentType.charAt(i++) != '=') continue;
  367. int j = i;
  368. loop2: while (i < len) {
  369. switch (contentType.charAt(i)) {
  370. case ' ':
  371. case ';':
  372. break loop2;
  373. }
  374. i++;
  375. }
  376. contentTypeCharset = contentType.substring(j, i);
  377. return;
  378. }
  379. }
  380. }
  381. /**
  382. * Returns the content encoding if available. This is only available
  383. * for some protocols.
  384. */
  385. public String getContentEncoding(String userAgent) {
  386. if (contentEncoding != null)
  387. return contentEncoding;
  388. if (!hasBeenOpened) {
  389. try {
  390. openStreamInternal(userAgent, null, null);
  391. } catch (IOException ioe) { /* nothing */ }
  392. }
  393. return contentEncoding;
  394. }
  395. /**
  396. * Returns true if the URL looks well formed and complete.
  397. * This does not garuntee that the stream can be opened but
  398. * is a good indication that things aren't totally messed up.
  399. */
  400. public boolean complete() {
  401. try {
  402. buildURL();
  403. } catch (MalformedURLException mue) {
  404. return false;
  405. }
  406. return true;
  407. }
  408. /**
  409. * Open the stream and check for common compression types. If
  410. * the stream is found to be compressed with a standard
  411. * compression type it is automatically decompressed.
  412. * @param userAgent The user agent opening the stream (may be null).
  413. * @param mimeTypes The expected mime types of the content
  414. * in the returned InputStream (mapped to Http accept
  415. * header among other possability). The elements of
  416. * the iterator must be strings (may be null)
  417. */
  418. public InputStream openStream(String userAgent, Iterator mimeTypes)
  419. throws IOException {
  420. InputStream raw = openStreamInternal(userAgent, mimeTypes,
  421. acceptedEncodings.iterator());
  422. if (raw == null)
  423. return null;
  424. stream = null;
  425. return checkGZIP(raw);
  426. }
  427. /**
  428. * Open the stream and returns it. No checks are made to see
  429. * if the stream is compressed or encoded in any way.
  430. * @param userAgent The user agent opening the stream (may be null).
  431. * @param mimeTypes The expected mime types of the content
  432. * in the returned InputStream (mapped to Http accept
  433. * header among other possability). The elements of
  434. * the iterator must be strings (may be null)
  435. */
  436. public InputStream openStreamRaw(String userAgent, Iterator mimeTypes)
  437. throws IOException {
  438. InputStream ret = openStreamInternal(userAgent, mimeTypes, null);
  439. stream = null;
  440. return ret;
  441. }
  442. protected InputStream openStreamInternal(String userAgent,
  443. Iterator mimeTypes,
  444. Iterator encodingTypes)
  445. throws IOException {
  446. if (stream != null)
  447. return stream;
  448. hasBeenOpened = true;
  449. URL url = null;
  450. try {
  451. url = buildURL();
  452. } catch (MalformedURLException mue) {
  453. throw new IOException
  454. ("Unable to make sense of URL for connection");
  455. }
  456. if (url == null)
  457. return null;
  458. URLConnection urlC = url.openConnection();
  459. if (urlC instanceof HttpURLConnection) {
  460. if (userAgent != null)
  461. urlC.setRequestProperty(HTTP_USER_AGENT_HEADER, userAgent);
  462. if (mimeTypes != null) {
  463. String acceptHeader = "";
  464. while (mimeTypes.hasNext()) {
  465. acceptHeader += mimeTypes.next();
  466. if (mimeTypes.hasNext())
  467. acceptHeader += ",";
  468. }
  469. urlC.setRequestProperty(HTTP_ACCEPT_HEADER, acceptHeader);
  470. }
  471. if (encodingTypes != null) {
  472. String encodingHeader = "";
  473. while (encodingTypes.hasNext()) {
  474. encodingHeader += encodingTypes.next();
  475. if (encodingTypes.hasNext())
  476. encodingHeader += ",";
  477. }
  478. urlC.setRequestProperty(HTTP_ACCEPT_ENCODING_HEADER,
  479. encodingHeader);
  480. }
  481. contentType = urlC.getContentType();
  482. contentEncoding = urlC.getContentEncoding();
  483. postConnectionURL = urlC.getURL();
  484. }
  485. try {
  486. return (stream = urlC.getInputStream());
  487. } catch (IOException e) {
  488. if (urlC instanceof HttpURLConnection) {
  489. // bug 49889: if available, return the error stream
  490. // (allow interpretation of content in the HTTP error response)
  491. return (stream = ((HttpURLConnection) urlC).getErrorStream());
  492. } else {
  493. throw e;
  494. }
  495. }
  496. }
  497. /**
  498. * Returns the URL up to and include the port number on
  499. * the host. Does not include the path or fragment pieces.
  500. */
  501. public String getPortStr() {
  502. String portStr ="";
  503. if (protocol != null)
  504. portStr += protocol + ":";
  505. if ((host != null) || (port != -1)) {
  506. portStr += "//";
  507. if (host != null) portStr += host;
  508. if (port != -1) portStr += ":" + port;
  509. }
  510. return portStr;
  511. }
  512. protected boolean sameFile(ParsedURLData other) {
  513. if (this == other) return true;
  514. // Check if the rest of the two PURLs matche other than
  515. // the 'ref'
  516. if ((port == other.port) &&
  517. ((path == other.path)
  518. || ((path!=null) && path.equals(other.path))) &&
  519. ((host == other.host)
  520. || ((host!=null) && host.equals(other.host))) &&
  521. ((protocol == other.protocol)
  522. || ((protocol!=null) && protocol.equals(other.protocol))))
  523. return true;
  524. return false;
  525. }
  526. /**
  527. * Return a string representation of the data.
  528. */
  529. public String toString() {
  530. String ret = getPortStr();
  531. if (path != null)
  532. ret += path;
  533. if (ref != null)
  534. ret += "#" + ref;
  535. return ret;
  536. }
  537. /**
  538. * Returns the URL that was ultimately used to fetch the resource
  539. * represented by the <code>ParsedURL</code>.
  540. */
  541. public String getPostConnectionURL() {
  542. if (postConnectionURL != null) {
  543. if (ref != null) {
  544. return postConnectionURL.toString() + '#' + ref;
  545. }
  546. return postConnectionURL.toString();
  547. }
  548. return toString();
  549. }
  550. }