PageRenderTime 54ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/eclipse-wtp-webservices-R3.4.0/org.eclipse.wst.ws.parser/src/org/eclipse/wst/ws/internal/parser/wsil/HTMLHeadHandler.java

#
Java | 296 lines | 212 code | 24 blank | 60 comment | 35 complexity | 81e85af995faaa15f2f80463bd935da4 MD5 | raw file
  1. /*******************************************************************************
  2. * Copyright (c) 2001, 2006 IBM Corporation and others.
  3. * All rights reserved. This program and the accompanying materials
  4. * are made available under the terms of the Eclipse Public License v1.0
  5. * which accompanies this distribution, and is available at
  6. * http://www.eclipse.org/legal/epl-v10.html
  7. *
  8. * Contributors:
  9. * IBM Corporation - initial API and implementation
  10. * yyyymmdd bug Email and other contact information
  11. * -------- -------- -----------------------------------------------------------
  12. * 20060517 142324 rsinha@ca.ibm.com - Rupam Kuehner
  13. *******************************************************************************/
  14. package org.eclipse.wst.ws.internal.parser.wsil;
  15. import java.io.UnsupportedEncodingException;
  16. import java.util.Vector;
  17. import org.xml.sax.Attributes;
  18. import org.xml.sax.SAXException;
  19. import org.xml.sax.SAXParseException;
  20. import org.xml.sax.helpers.DefaultHandler;
  21. public class HTMLHeadHandler extends DefaultHandler
  22. {
  23. private final char START_TAG = '<';
  24. private final char END_TAG = '>';
  25. private final String HEAD_START_TAG = "<head>";
  26. private final String HEAD_END_TAG = "</head>";
  27. private final String ROOT_START_TAG = "<root>";
  28. private final String ROOT_END_TAG = "</root>";
  29. private final String UTF8 = "UTF-8";
  30. //HTML META tag information used to detect the charset.
  31. private final String HTML_CONTENT = "content";
  32. private final String HTTP_EQUIV = "http-equiv";
  33. private final String HTTP_EQUIV_CONTENT_TYPE = "Content-Type";
  34. private final String CHARSET = "charset";
  35. // WSIL tag information.
  36. private final String META = "meta";
  37. private final String NAME = "name";
  38. private final String SERVICE_INSPECTION = "serviceInspection";
  39. private final String CONTENT = "content";
  40. // DISCO tag information.
  41. private final String LINK = "link";
  42. private final String TYPE = "type";
  43. private final String TEXT_XML = "text/xml";
  44. private final String REL = "rel";
  45. private final String ALTERNATE = "alternate";
  46. private final String HREF = "href";
  47. private String baseURI_;
  48. private Vector wsils_;
  49. private Vector discos_;
  50. private String byteEncoding = UTF8; //Default to UTF-8.
  51. public HTMLHeadHandler(String baseURI)
  52. {
  53. super();
  54. baseURI_ = baseURI;
  55. wsils_ = new Vector();
  56. discos_ = new Vector();
  57. }
  58. public String[] getWsils()
  59. {
  60. String[] wsils = new String[wsils_.size()];
  61. wsils_.copyInto(wsils);
  62. return wsils;
  63. }
  64. public String[] getDiscos()
  65. {
  66. String[] discos = new String[discos_.size()];
  67. discos_.copyInto(discos);
  68. return discos;
  69. }
  70. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
  71. {
  72. String qNameLC = qName.toLowerCase();
  73. if (qNameLC.equals(META))
  74. {
  75. String nameValue = attributes.getValue(NAME);
  76. if (SERVICE_INSPECTION.equals(nameValue))
  77. {
  78. String wsilURI = attributes.getValue(CONTENT);
  79. if (baseURI_ != null && wsilURI.indexOf(":/") == -1)
  80. {
  81. StringBuffer sb = new StringBuffer();
  82. sb.append(baseURI_.substring(0, baseURI_.lastIndexOf("/")+1));
  83. sb.append(wsilURI);
  84. wsilURI = sb.toString();
  85. }
  86. if (!wsils_.contains(wsilURI))
  87. wsils_.add(wsilURI);
  88. }
  89. }
  90. else if (qNameLC.equals(LINK))
  91. {
  92. // See http://msdn.microsoft.com/msdnmag/issues/02/02/xml/default.aspx for more details on DISCO.
  93. String type = attributes.getValue(TYPE);
  94. String rel = attributes.getValue(REL);
  95. String href = attributes.getValue(HREF);
  96. if (TEXT_XML.equals(type) && ALTERNATE.equals(rel) && href != null)
  97. {
  98. String discoURI = href;
  99. if (discoURI.indexOf(":/") == -1)
  100. {
  101. StringBuffer sb = new StringBuffer();
  102. sb.append(baseURI_.substring(0,baseURI_.lastIndexOf("/")+1));
  103. sb.append(discoURI);
  104. discoURI = sb.toString();
  105. }
  106. if (!discos_.contains(discoURI))
  107. discos_.add(discoURI);
  108. }
  109. }
  110. }
  111. public void error(SAXParseException e) throws SAXException
  112. {
  113. }
  114. public void fatalError(SAXParseException e) throws SAXException
  115. {
  116. }
  117. public void warning(SAXParseException e) throws SAXException
  118. {
  119. }
  120. /**
  121. * Appends the elements of the provided tag in the provided document to the provided StringBuffer.
  122. * @param target
  123. * @param document
  124. * @param tag
  125. * @param encoding
  126. * @return boolean false if the value of the encoding parameter matched the detected charset or if no charset was detected.
  127. * Returns true if a charset was detected and it did not equal the encoding parameter. If true is returned
  128. * the harvesting of the tags would have stopped at the point the charset was detected. The caller
  129. * should call this method again with the correct encoding.
  130. */
  131. private boolean harvestTags(StringBuffer target,String document,String tag, String encoding)
  132. {
  133. boolean changeEncoding = false;
  134. int index = document.indexOf(START_TAG);
  135. int documentLength = document.length();
  136. int tagLength = tag.length();
  137. while (index != -1 && (index+1+tagLength)<documentLength)
  138. {
  139. String str = document.substring(index+1,index+1+tagLength);
  140. if (str.toLowerCase().equals(tag))
  141. {
  142. str = document.substring(index,document.indexOf(END_TAG,index+1)+1);
  143. target.append(str);
  144. index += str.length();
  145. //If tag is META and declares the charset, find out what it is
  146. //and if it matches what was passed in. If it matches, continue
  147. //with the parsing and return false when complete.
  148. //If the detected charset is different from what was passed in,
  149. //- change byteEncoding to equal the detected charset.
  150. //- stop parsing.
  151. //- return true.
  152. if (tag.equals(META))
  153. {
  154. int idxOfContent = str.indexOf(HTML_CONTENT);
  155. int idxOfHTTPEQUIV = str.indexOf(HTTP_EQUIV);
  156. if (idxOfHTTPEQUIV!= -1 && idxOfContent != -1)
  157. {
  158. //Check if the http-equiv attribute is set to Content-Type.
  159. int idxOfHTTPEQUIVOpenQuote = str.indexOf("\"", idxOfHTTPEQUIV+1);
  160. int idxOfHTTPEQUIVClosingQuote = str.indexOf("\"", idxOfHTTPEQUIVOpenQuote+1);
  161. String hTTPEQUIVValueUntrimmed = str.substring(idxOfHTTPEQUIVOpenQuote+1, idxOfHTTPEQUIVClosingQuote);
  162. if (hTTPEQUIVValueUntrimmed.trim().equals(HTTP_EQUIV_CONTENT_TYPE))
  163. {
  164. //This META tag contains the charset. Get the value of the content attribute
  165. int idxOfOpenQuote = str.indexOf("\"", idxOfContent+1);
  166. int idxOfClosingQuote = str.indexOf("\"", idxOfOpenQuote+1);
  167. String contentValue = str.substring(idxOfOpenQuote+1, idxOfClosingQuote);
  168. //Get the charset
  169. int idxOfCharSet = contentValue.indexOf(CHARSET);
  170. int idxOfEquals = contentValue.indexOf("=", idxOfCharSet+CHARSET.length());
  171. String detectedEncodingValueUntrimmed = contentValue.substring(idxOfEquals+1);
  172. String detectedEncodingValue = detectedEncodingValueUntrimmed.trim();
  173. if (!detectedEncodingValue.equals(encoding))
  174. {
  175. byteEncoding = detectedEncodingValue;
  176. changeEncoding = true;
  177. break;
  178. }
  179. }
  180. }
  181. }
  182. }
  183. else
  184. index++;
  185. index = document.indexOf(START_TAG,index);
  186. }
  187. return changeEncoding;
  188. }
  189. /**
  190. * If the provided byte array reperesents the contents of an HTML
  191. * document, this method will return a byte array in which
  192. * <ul>
  193. * <li>the opening and closing HEAD tags are removed and replaced with
  194. * opening and closing <root> tags</li>
  195. * <li>only the META and LINK elements are in the HTML document
  196. * are included in the contents between the opening and closing
  197. * <root> tags.
  198. * </ul>
  199. * This method will modify the value of the byteEncoding String
  200. * attribute on this class if it is something other than
  201. * UTF-8. Callers of this method should call getByteEncoding()
  202. * after calling this method if they need to know the charset
  203. * value used by this method to decode/endcode the byte array.
  204. * @param b
  205. * @return byte[]
  206. */
  207. public byte[] harvestHeadTags(byte[] b)
  208. {
  209. String s;
  210. try
  211. {
  212. //Assume the default byte encoding of UTF-8 for now.
  213. s = new String(b, byteEncoding);
  214. }
  215. catch (UnsupportedEncodingException uee)
  216. {
  217. s = new String(b);
  218. }
  219. String head = s.toLowerCase();
  220. int headStartIndex = head.indexOf(HEAD_START_TAG);
  221. int headEndIndex = head.indexOf(HEAD_END_TAG);
  222. StringBuffer sb = new StringBuffer();
  223. sb.append(ROOT_START_TAG);
  224. if (headStartIndex != -1 && headEndIndex != -1)
  225. {
  226. head = s.substring(headStartIndex, headEndIndex+HEAD_END_TAG.length());
  227. boolean encodingChanged = harvestTags(sb,head,META, byteEncoding);
  228. if (encodingChanged)
  229. {
  230. //The harvestTags method detected a different charset
  231. //than the one that was passed in. Start from the beginning
  232. //with the correct charset.
  233. String s2;
  234. try
  235. {
  236. s2 = new String(b, byteEncoding);
  237. }
  238. catch (UnsupportedEncodingException uee)
  239. {
  240. s2 = new String(b);
  241. }
  242. String head2 = s2.toLowerCase();
  243. int head2StartIndex = head2.indexOf(HEAD_START_TAG);
  244. int head2EndIndex = head2.indexOf(HEAD_END_TAG);
  245. sb = new StringBuffer();
  246. sb.append(ROOT_START_TAG);
  247. if (head2StartIndex != -1 && head2EndIndex != -1)
  248. {
  249. head2 = s2.substring(head2StartIndex, head2EndIndex+HEAD_END_TAG.length());
  250. harvestTags(sb,head2,META, byteEncoding);
  251. harvestTags(sb,head2,LINK,byteEncoding);
  252. }
  253. }
  254. else
  255. {
  256. harvestTags(sb,head,LINK,byteEncoding);
  257. }
  258. }
  259. sb.append(ROOT_END_TAG);
  260. try
  261. {
  262. return sb.toString().getBytes(byteEncoding);
  263. } catch (UnsupportedEncodingException uee)
  264. {
  265. return sb.toString().getBytes();
  266. }
  267. }
  268. public String getByteEncoding()
  269. {
  270. return byteEncoding;
  271. }
  272. }