/hudson-core/src/main/java/hudson/util/QuotedStringTokenizer.java

http://github.com/hudson/hudson · Java · 565 lines · 431 code · 34 blank · 100 comment · 19 complexity · 064a15392eec1759bbdadb06f765ce66 MD5 · raw file

  1. /**
  2. * (C) Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
  3. *
  4. * Parts of this code was taken from the Jetty project, which can be
  5. * found at http://www.mortbay.org/jetty
  6. *
  7. * Licensed to the Apache Software Foundation (ASF) under one or more
  8. * contributor license agreements. See the NOTICE file distributed with
  9. * this work for additional information regarding copyright ownership.
  10. * The ASF licenses this file to You under the Apache License, Version 2.0
  11. * (the "License"); you may not use this file except in compliance with
  12. * the License. You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. */
  22. // ========================================================================
  23. // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
  24. // ------------------------------------------------------------------------
  25. // Licensed under the Apache License, Version 2.0 (the "License");
  26. // you may not use this file except in compliance with the License.
  27. // You may obtain a copy of the License at
  28. // http://www.apache.org/licenses/LICENSE-2.0
  29. // Unless required by applicable law or agreed to in writing, software
  30. // distributed under the License is distributed on an "AS IS" BASIS,
  31. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  32. // See the License for the specific language governing permissions and
  33. // limitations under the License.
  34. // ========================================================================
  35. package hudson.util;
  36. import java.util.NoSuchElementException;
  37. import java.util.StringTokenizer;
  38. import java.util.List;
  39. import java.util.ArrayList;
  40. /* ------------------------------------------------------------ */
  41. /** StringTokenizer with Quoting support.
  42. *
  43. * This class is a copy of the java.util.StringTokenizer API and
  44. * the behaviour is the same, except that single and doulbe quoted
  45. * string values are recognized.
  46. * Delimiters within quotes are not considered delimiters.
  47. * Quotes can be escaped with '\'.
  48. *
  49. * @see java.util.StringTokenizer
  50. * @author Greg Wilkins (gregw)
  51. */
  52. public class QuotedStringTokenizer
  53. extends StringTokenizer
  54. {
  55. private final static String __delim=" \t\n\r";
  56. private String _string;
  57. private String _delim = __delim;
  58. private boolean _returnQuotes=false;
  59. private boolean _returnDelimiters=false;
  60. private StringBuffer _token;
  61. private boolean _hasToken=false;
  62. private int _i=0;
  63. private int _lastStart=0;
  64. private boolean _double=true;
  65. private boolean _single=true;
  66. public static String[] tokenize(String str) {
  67. return new QuotedStringTokenizer(str).toArray();
  68. }
  69. public static String[] tokenize(String str, String delimiters) {
  70. return new QuotedStringTokenizer(str,delimiters).toArray();
  71. }
  72. /* ------------------------------------------------------------ */
  73. /**
  74. *
  75. * @param str
  76. * String to tokenize.
  77. * @param delim
  78. * List of delimiter characters as string. Can be null, to default to ' \t\n\r'
  79. * @param returnDelimiters
  80. * If true, {@link #nextToken()} will include the delimiters, not just tokenized
  81. * tokens.
  82. * @param returnQuotes
  83. * If true, {@link #nextToken()} will include the quotation characters when they are present.
  84. */
  85. public QuotedStringTokenizer(String str,
  86. String delim,
  87. boolean returnDelimiters,
  88. boolean returnQuotes)
  89. {
  90. super("");
  91. _string=str;
  92. if (delim!=null)
  93. _delim=delim;
  94. _returnDelimiters=returnDelimiters;
  95. _returnQuotes=returnQuotes;
  96. if (_delim.indexOf('\'')>=0 ||
  97. _delim.indexOf('"')>=0)
  98. throw new Error("Can't use quotes as delimiters: "+_delim);
  99. _token=new StringBuffer(_string.length()>1024?512:_string.length()/2);
  100. }
  101. /* ------------------------------------------------------------ */
  102. public QuotedStringTokenizer(String str,
  103. String delim,
  104. boolean returnDelimiters)
  105. {
  106. this(str,delim,returnDelimiters,false);
  107. }
  108. /* ------------------------------------------------------------ */
  109. public QuotedStringTokenizer(String str,
  110. String delim)
  111. {
  112. this(str,delim,false,false);
  113. }
  114. /* ------------------------------------------------------------ */
  115. public QuotedStringTokenizer(String str)
  116. {
  117. this(str,null,false,false);
  118. }
  119. public String[] toArray() {
  120. List<String> r = new ArrayList<String>();
  121. while(hasMoreTokens())
  122. r.add(nextToken());
  123. return r.toArray(new String[r.size()]);
  124. }
  125. /* ------------------------------------------------------------ */
  126. @Override
  127. public boolean hasMoreTokens()
  128. {
  129. // Already found a token
  130. if (_hasToken)
  131. return true;
  132. _lastStart=_i;
  133. int state=0;
  134. boolean escape=false;
  135. while (_i<_string.length())
  136. {
  137. char c=_string.charAt(_i++);
  138. switch (state)
  139. {
  140. case 0: // Start
  141. if(_delim.indexOf(c)>=0)
  142. {
  143. if (_returnDelimiters)
  144. {
  145. _token.append(c);
  146. return _hasToken=true;
  147. }
  148. }
  149. else if (c=='\'' && _single)
  150. {
  151. if (_returnQuotes)
  152. _token.append(c);
  153. state=2;
  154. }
  155. else if (c=='\"' && _double)
  156. {
  157. if (_returnQuotes)
  158. _token.append(c);
  159. state=3;
  160. }
  161. else
  162. {
  163. _token.append(c);
  164. _hasToken=true;
  165. state=1;
  166. }
  167. continue;
  168. case 1: // Token
  169. _hasToken=true;
  170. if (escape)
  171. {
  172. escape=false;
  173. if(ESCAPABLE_CHARS.indexOf(c)<0)
  174. _token.append('\\');
  175. _token.append(c);
  176. }
  177. else if(_delim.indexOf(c)>=0)
  178. {
  179. if (_returnDelimiters)
  180. _i--;
  181. return _hasToken;
  182. }
  183. else if (c=='\'' && _single)
  184. {
  185. if (_returnQuotes)
  186. _token.append(c);
  187. state=2;
  188. }
  189. else if (c=='\"' && _double)
  190. {
  191. if (_returnQuotes)
  192. _token.append(c);
  193. state=3;
  194. }
  195. else if (c=='\\')
  196. {
  197. escape=true;
  198. }
  199. else
  200. _token.append(c);
  201. continue;
  202. case 2: // Single Quote
  203. _hasToken=true;
  204. if (escape)
  205. {
  206. escape=false;
  207. if(ESCAPABLE_CHARS.indexOf(c)<0)
  208. _token.append('\\');
  209. _token.append(c);
  210. }
  211. else if (c=='\'')
  212. {
  213. if (_returnQuotes)
  214. _token.append(c);
  215. state=1;
  216. }
  217. else if (c=='\\')
  218. {
  219. if (_returnQuotes)
  220. _token.append(c);
  221. escape=true;
  222. }
  223. else
  224. _token.append(c);
  225. continue;
  226. case 3: // Double Quote
  227. _hasToken=true;
  228. if (escape)
  229. {
  230. escape=false;
  231. if(ESCAPABLE_CHARS.indexOf(c)<0)
  232. _token.append('\\');
  233. _token.append(c);
  234. }
  235. else if (c=='\"')
  236. {
  237. if (_returnQuotes)
  238. _token.append(c);
  239. state=1;
  240. }
  241. else if (c=='\\')
  242. {
  243. if (_returnQuotes)
  244. _token.append(c);
  245. escape=true;
  246. }
  247. else
  248. _token.append(c);
  249. continue;
  250. }
  251. }
  252. return _hasToken;
  253. }
  254. /* ------------------------------------------------------------ */
  255. @Override
  256. public String nextToken()
  257. throws NoSuchElementException
  258. {
  259. if (!hasMoreTokens() || _token==null)
  260. throw new NoSuchElementException();
  261. String t=_token.toString();
  262. _token.setLength(0);
  263. _hasToken=false;
  264. return t;
  265. }
  266. /* ------------------------------------------------------------ */
  267. @Override
  268. public String nextToken(String delim)
  269. throws NoSuchElementException
  270. {
  271. _delim=delim;
  272. _i=_lastStart;
  273. _token.setLength(0);
  274. _hasToken=false;
  275. return nextToken();
  276. }
  277. /* ------------------------------------------------------------ */
  278. @Override
  279. public boolean hasMoreElements()
  280. {
  281. return hasMoreTokens();
  282. }
  283. /* ------------------------------------------------------------ */
  284. @Override
  285. public Object nextElement()
  286. throws NoSuchElementException
  287. {
  288. return nextToken();
  289. }
  290. /* ------------------------------------------------------------ */
  291. /** Not implemented.
  292. */
  293. @Override
  294. public int countTokens()
  295. {
  296. return -1;
  297. }
  298. /* ------------------------------------------------------------ */
  299. /** Quote a string.
  300. * The string is quoted only if quoting is required due to
  301. * embeded delimiters, quote characters or the
  302. * empty string.
  303. * @param s The string to quote.
  304. * @return quoted string
  305. */
  306. public static String quote(String s, String delim)
  307. {
  308. if (s==null)
  309. return null;
  310. if (s.length()==0)
  311. return "\"\"";
  312. for (int i=0;i<s.length();i++)
  313. {
  314. char c = s.charAt(i);
  315. if (c=='\\' || c=='"' || c=='\'' || Character.isWhitespace(c) || delim.indexOf(c)>=0)
  316. {
  317. StringBuffer b=new StringBuffer(s.length()+8);
  318. quote(b,s);
  319. return b.toString();
  320. }
  321. }
  322. return s;
  323. }
  324. /* ------------------------------------------------------------ */
  325. /** Quote a string.
  326. * The string is quoted only if quoting is required due to
  327. * embeded delimiters, quote characters or the
  328. * empty string.
  329. * @param s The string to quote.
  330. * @return quoted string
  331. */
  332. public static String quote(String s)
  333. {
  334. if (s==null)
  335. return null;
  336. if (s.length()==0)
  337. return "\"\"";
  338. StringBuffer b=new StringBuffer(s.length()+8);
  339. quote(b,s);
  340. return b.toString();
  341. }
  342. /* ------------------------------------------------------------ */
  343. /** Quote a string into a StringBuffer.
  344. * The characters ", \, \n, \r, \t, \f and \b are escaped
  345. * @param buf The StringBuffer
  346. * @param s The String to quote.
  347. */
  348. public static void quote(StringBuffer buf, String s)
  349. {
  350. synchronized(buf)
  351. {
  352. buf.append('"');
  353. for (int i=0;i<s.length();i++)
  354. {
  355. char c = s.charAt(i);
  356. switch(c)
  357. {
  358. case '"':
  359. buf.append("\\\"");
  360. continue;
  361. case '\\':
  362. buf.append("\\\\");
  363. continue;
  364. case '\n':
  365. buf.append("\\n");
  366. continue;
  367. case '\r':
  368. buf.append("\\r");
  369. continue;
  370. case '\t':
  371. buf.append("\\t");
  372. continue;
  373. case '\f':
  374. buf.append("\\f");
  375. continue;
  376. case '\b':
  377. buf.append("\\b");
  378. continue;
  379. default:
  380. buf.append(c);
  381. continue;
  382. }
  383. }
  384. buf.append('"');
  385. }
  386. }
  387. /* ------------------------------------------------------------ */
  388. /** Unquote a string.
  389. * @param s The string to unquote.
  390. * @return quoted string
  391. */
  392. public static String unquote(String s)
  393. {
  394. if (s==null)
  395. return null;
  396. if (s.length()<2)
  397. return s;
  398. char first=s.charAt(0);
  399. char last=s.charAt(s.length()-1);
  400. if (first!=last || (first!='"' && first!='\''))
  401. return s;
  402. StringBuffer b=new StringBuffer(s.length()-2);
  403. synchronized(b)
  404. {
  405. boolean escape=false;
  406. for (int i=1;i<s.length()-1;i++)
  407. {
  408. char c = s.charAt(i);
  409. if (escape)
  410. {
  411. escape=false;
  412. switch (c)
  413. {
  414. case 'n':
  415. b.append('\n');
  416. break;
  417. case 'r':
  418. b.append('\r');
  419. break;
  420. case 't':
  421. b.append('\t');
  422. break;
  423. case 'f':
  424. b.append('\f');
  425. break;
  426. case 'b':
  427. b.append('\b');
  428. break;
  429. case 'u':
  430. b.append((char)(
  431. (convertHexDigit((byte)s.charAt(i++))<<24)+
  432. (convertHexDigit((byte)s.charAt(i++))<<16)+
  433. (convertHexDigit((byte)s.charAt(i++))<<8)+
  434. (convertHexDigit((byte)s.charAt(i++)))
  435. )
  436. );
  437. break;
  438. default:
  439. b.append(c);
  440. }
  441. }
  442. else if (c=='\\')
  443. {
  444. escape=true;
  445. continue;
  446. }
  447. else
  448. b.append(c);
  449. }
  450. return b.toString();
  451. }
  452. }
  453. /* ------------------------------------------------------------ */
  454. /**
  455. * @return handle double quotes if true
  456. */
  457. public boolean getDouble()
  458. {
  459. return _double;
  460. }
  461. /* ------------------------------------------------------------ */
  462. /**
  463. * @param d handle double quotes if true
  464. */
  465. public void setDouble(boolean d)
  466. {
  467. _double=d;
  468. }
  469. /* ------------------------------------------------------------ */
  470. /**
  471. * @return handle single quotes if true
  472. */
  473. public boolean getSingle()
  474. {
  475. return _single;
  476. }
  477. /* ------------------------------------------------------------ */
  478. /**
  479. * @param single handle single quotes if true
  480. */
  481. public void setSingle(boolean single)
  482. {
  483. _single=single;
  484. }
  485. /**
  486. * @param b An ASCII encoded character 0-9 a-f A-F
  487. * @return The byte value of the character 0-16.
  488. */
  489. public static byte convertHexDigit( byte b )
  490. {
  491. if ((b >= '0') && (b <= '9')) return (byte)(b - '0');
  492. if ((b >= 'a') && (b <= 'f')) return (byte)(b - 'a' + 10);
  493. if ((b >= 'A') && (b <= 'F')) return (byte)(b - 'A' + 10);
  494. return 0;
  495. }
  496. /**
  497. * Characters that can be escaped with \.
  498. *
  499. * Others, like, say, \W will be left alone instead of becoming just W.
  500. * This is important to keep Hudson behave on Windows, which uses '\' as
  501. * the directory separator.
  502. */
  503. private static final String ESCAPABLE_CHARS = "\\\"' ";
  504. }