PageRenderTime 22ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/src/com/atlassian/uwc/util/TokenMap.java

https://bitbucket.org/atlassianlabs/universal-wiki-connector
Java | 231 lines | 112 code | 16 blank | 103 comment | 12 complexity | 476b23fbbe6b3a8dfaa800ad25682f9c MD5 | raw file
  1. package com.atlassian.uwc.util;
  2. import java.util.ArrayList;
  3. import java.util.Collection;
  4. import java.util.Date;
  5. import java.util.HashMap;
  6. import java.util.Stack;
  7. import java.util.regex.Matcher;
  8. import java.util.regex.Pattern;
  9. import org.apache.log4j.Logger;
  10. /**
  11. * This is a helper class to create, store and retrieve tokens.
  12. * <p/>
  13. * Certain elements such as links and code can be quite tricky
  14. * to convert. One issue is that you need to escape text in some places
  15. * but not others (like inside links).
  16. * <p/>
  17. * Use this class for anything where you want to avoid syntaxt from
  18. * being escaped. VERY HELPFUL.
  19. */
  20. public class TokenMap {
  21. protected static Logger log = Logger.getLogger("TokenMap");
  22. public final static String TOKEN_START = "~UWCTOKENSTART~";
  23. public final static String TOKEN_END = "~UWCTOKENEND~";
  24. private static HashMap<String, String> tokenCache = new HashMap<String, String>();
  25. private static Stack<String> keyStack = new Stack<String>();
  26. private static long tokenCounter = (new Date()).getTime();
  27. //backup (in case comment converter is used internally)
  28. private static HashMap<String, String> backupCache = new HashMap<String, String>();
  29. private static Stack<String> backupKeys = new Stack<String>();
  30. public synchronized static String add(String textToReplaceWithToken) {
  31. // assemble token
  32. tokenCounter++;
  33. String keyToken = TOKEN_START + tokenCounter + TOKEN_END;
  34. // add to Map
  35. if (tokenCache.get(tokenCounter) != null) {
  36. log.error("DUPLICATE TOKEN! " + tokenCounter);
  37. throw new Error("DUPLICATE TOKEN!");
  38. }
  39. // log.error("tokenizing: " + keyToken + ", " + textToReplaceWithToken); //COMMENT
  40. tokenCache.put(keyToken, textToReplaceWithToken);
  41. keyStack.push(keyToken);
  42. return keyToken;
  43. }
  44. /**
  45. * retrieves a value from the map, but uplon retrieving also
  46. * removes the value
  47. *
  48. * @param token
  49. * @return original value
  50. */
  51. public synchronized static String getValueAndRemove(String token) {
  52. String value = tokenCache.get(token);
  53. tokenCache.remove(token);
  54. return value;
  55. }
  56. private static String racecheck = "";
  57. /**
  58. * replaces all the tokens in the input string with the values
  59. * stored in the cache and then removes them from the cache to
  60. * keep it lean
  61. *
  62. * @param inputText
  63. * @return detokenized text
  64. */
  65. public synchronized static String detokenizeText(String inputText) {
  66. // log.error("Detokenizing: " + inputText); //COMMENT
  67. String result = inputText;
  68. Stack<String> keys = getKeys();
  69. Collection<String> keysToRemove = new ArrayList();
  70. int iteration = 1;
  71. int previousTokenCacheSize = tokenCache.size();
  72. racecheck = "";
  73. // sometimes tokens get tokenized in which case we have to keep unrolling, hence this while loop
  74. while (tokenCache.size() > 0) {
  75. String key = null;
  76. while (!keys.empty()) {
  77. key = keys.pop(); //We use a stack so that the detokenizing order is properly maintained UWC-398
  78. // log.debug("key = " + key); //COMMENT
  79. // if the key/token is found in the input replace it with the original value,
  80. // remove from the cache and iterate
  81. if (result.contains(key)) {
  82. String value = tokenCache.get(key);
  83. // log.error("detokenizing key = "+key+" value= "+value); //COMMENT
  84. result = result.replace(key, value);
  85. // } else { //COMMENT
  86. // log.error("key (" + key + ") not found for value: " + tokenCache.get(key)); //COMMENT
  87. }
  88. keysToRemove.add(key);
  89. }
  90. // clean up the cache by removing the keys that have
  91. // already been used. these are unique and won't be needed further
  92. for (String keyToRemove : keysToRemove) {
  93. tokenCache.remove(keyToRemove);
  94. }
  95. keysToRemove.clear();
  96. // log.debug("detokenizing iteration " + iteration++ + " tokenCache size = " + tokenCache.size()); //COMMENT
  97. // a bit arbitrary, but break out of the loop if we can't seem to get the tokens out
  98. if (previousTokenCacheSize==tokenCache.size() && iteration++>10) {
  99. log.info("breaking out of detokenizing loop: cache size = "+previousTokenCacheSize+" cache = "+tokenCache);
  100. // log.info("text = "+result); //COMMENT
  101. tokenCache.clear();
  102. keyStack.clear();
  103. break;
  104. }
  105. previousTokenCacheSize = tokenCache.size();
  106. }
  107. if (result.contains(TOKEN_START)) {
  108. log.error("Result still contains " + TOKEN_START);
  109. }
  110. return result;
  111. }
  112. public synchronized static Stack<String> getKeys() {
  113. return keyStack;
  114. }
  115. /**
  116. * If you are running an engine within a converter that might call the detokenizer,
  117. * call backupTokens first, so that your page's tokens aren't lost. Then when you're done
  118. * with your internal engine, call revertTokens.
  119. */
  120. public synchronized static void backupTokens() {
  121. backupCache.putAll(tokenCache);
  122. backupKeys.addAll(keyStack);
  123. }
  124. /**
  125. * If you are running an engine within a converter that might call the detokenizer,
  126. * call backupTokens first, so that your page's tokens aren't lost. Then when you're done
  127. * with your internal engine, call revertTokens.
  128. */
  129. public synchronized static void revertTokens() {
  130. tokenCache.putAll(backupCache);
  131. keyStack.addAll(backupKeys);
  132. backupCache.clear();
  133. backupKeys.clear();
  134. }
  135. /**
  136. * calls replaceAndTokenize with no flags
  137. *
  138. * @param twikiText
  139. * @param regex
  140. * @param regexReplacement
  141. * @return twikiText with all of the matches tokenized
  142. */
  143. public static String replaceAndTokenize(String twikiText,
  144. String regex,
  145. String regexReplacement) {
  146. return replaceAndTokenize(twikiText, regex, regexReplacement, 0);
  147. }
  148. /**
  149. * calls replaceAndTokenize with the multi-line flags of
  150. * Pattern.MULTILINE|Pattern.DOTALL
  151. *
  152. * @param twikiText
  153. * @param regex
  154. * @param regexReplacement
  155. * @return twikiText with all of the matches tokenized
  156. */
  157. public static String replaceAndTokenizeMultiLine(String twikiText,
  158. String regex,
  159. String regexReplacement) {
  160. return replaceAndTokenize(twikiText, regex, regexReplacement, Pattern.MULTILINE | Pattern.DOTALL);
  161. }
  162. /**
  163. * This method is very handy. Learn it, love it. It will save you time and
  164. * is great to use with things like links or other text/syntext that can be
  165. * easily 'messed' up by other converters.
  166. * <p/>
  167. * Basically it does these things:
  168. * 1) finds the match
  169. * 2) creates the replacement text
  170. * 3) puts the replacement into the TokenMap and hands back a token
  171. * 4) sticks the token into the original text
  172. * <p/>
  173. * Thus any successful match is then immune to further accidental tampering
  174. * by other converters
  175. *
  176. * @param twikiText
  177. * @param regex
  178. * @param regexReplacement
  179. * @return twikiText with all of the matches tokenized
  180. */
  181. public static String replaceAndTokenize(String twikiText,
  182. String regex,
  183. String regexReplacement,
  184. int flags) {
  185. if (flags == (Pattern.DOTALL | Pattern.MULTILINE)) {
  186. // enable multi line mode
  187. // not using the inline command (?s) doesn't seem to work
  188. regex = "(?s)" + regex;
  189. }
  190. // Compile the regex.
  191. Pattern pattern = Pattern.compile(regex, flags);
  192. // Get a Matcher based on the target string.
  193. Matcher matcher = pattern.matcher(twikiText);
  194. String retString = twikiText;
  195. // Find all the matches.
  196. while (matcher.find()) {
  197. // find the match
  198. String whatMatched = retString.substring(matcher.start(), matcher.end());
  199. // transform the match accodingly and into a token
  200. String replacedTheMatch = whatMatched.replaceFirst(regex, regexReplacement);
  201. String token = TokenMap.add(replacedTheMatch);
  202. //XXX Use these to debug problems
  203. // log.debug("regex = " + regex); //COMMENT
  204. // log.debug("regex replacement = " + regexReplacement); //COMMENT
  205. // log.debug("what matched = " + whatMatched); //COMMENT
  206. // log.debug("replacedTheMatch = " + replacedTheMatch); //COMMENT
  207. // log.debug("token = " + token); //COMMENT
  208. // stick the token into the original text
  209. retString = matcher.replaceFirst(token);
  210. // reset the matcher to deal with the new and altered retString
  211. matcher = pattern.matcher(retString);
  212. }
  213. return retString;
  214. }
  215. }