PageRenderTime 54ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/saxonB/net/sf/saxon/functions/Translate.java

https://bitbucket.org/dmwelch/phdxnat_pipeline
Java | 184 lines | 99 code | 28 blank | 57 comment | 27 complexity | 9f5d888c42b6ca982c252e10a5e9bfc5 MD5 | raw file
  1. package net.sf.saxon.functions;
  2. import net.sf.saxon.expr.*;
  3. import net.sf.saxon.om.FastStringBuffer;
  4. import net.sf.saxon.om.Item;
  5. import net.sf.saxon.trans.XPathException;
  6. import net.sf.saxon.value.StringValue;
  7. import net.sf.saxon.type.ItemType;
  8. import net.sf.saxon.sort.IntToIntHashMap;
  9. import net.sf.saxon.sort.IntToIntMap;
  10. /**
  11. * Implement the XPath translate() function
  12. */
  13. public class Translate extends SystemFunction {
  14. private IntToIntMap staticMap = null;
  15. // if the second and third arguments are known statically, we build a hash table for fast
  16. // lookup at run-time.
  17. public Expression typeCheck(ExpressionVisitor visitor, ItemType contextItemType) throws XPathException {
  18. Expression e = super.typeCheck(visitor, contextItemType);
  19. if (e == this && argument[1] instanceof StringLiteral && argument[2] instanceof StringLiteral) {
  20. // second and third arguments known statically: build an index
  21. staticMap = buildMap((StringValue)((StringLiteral)argument[1]).getValue(),
  22. (StringValue)((StringLiteral)argument[2]).getValue());
  23. }
  24. return e;
  25. }
  26. /**
  27. * Evaluate the function
  28. */
  29. public Item evaluateItem(XPathContext context) throws XPathException {
  30. StringValue sv1 = (StringValue)argument[0].evaluateItem(context);
  31. if (sv1==null) {
  32. return StringValue.EMPTY_STRING;
  33. };
  34. if (staticMap != null) {
  35. CharSequence in = sv1.getStringValueCS();
  36. CharSequence sb = translateUsingMap(in, staticMap);
  37. return new StringValue(sb);
  38. }
  39. StringValue sv2 = (StringValue)argument[1].evaluateItem(context);
  40. StringValue sv3 = (StringValue)argument[2].evaluateItem(context);
  41. return StringValue.makeStringValue(translate(sv1, sv2, sv3));
  42. }
  43. /**
  44. * Get the translation map built at compile time if there is one
  45. */
  46. public IntToIntMap getStaticMap() {
  47. return staticMap;
  48. }
  49. /**
  50. * Perform the translate function
  51. */
  52. public static CharSequence translate(StringValue sv0, StringValue sv1, StringValue sv2) {
  53. // if any string contains surrogate pairs, expand everything to 32-bit characters
  54. if (sv0.containsSurrogatePairs() || sv1.containsSurrogatePairs() || sv2.containsSurrogatePairs()) {
  55. return translateUsingMap(sv0.getStringValueCS(), buildMap(sv1, sv2));
  56. }
  57. // if the size of the strings is above some threshold, use a hash map to avoid O(n*m) performance
  58. if (sv0.getLength() * sv1.getLength() > 60) {
  59. // TODO: make measurements to get the optimum cut-off point
  60. return translateUsingMap(sv0.getStringValueCS(), buildMap(sv1, sv2));
  61. }
  62. CharSequence cs0 = sv0.getStringValueCS();
  63. CharSequence cs1 = sv1.getStringValueCS();
  64. CharSequence cs2 = sv2.getStringValueCS();
  65. String st1 = cs1.toString();
  66. FastStringBuffer sb = new FastStringBuffer(cs0.length());
  67. int s2len = cs2.length();
  68. int s0len = cs0.length();
  69. for (int i=0; i<s0len; i++) {
  70. char c = cs0.charAt(i);
  71. int j = st1.indexOf(c);
  72. if (j<s2len) {
  73. sb.append(( j<0 ? c : cs2.charAt(j) ));
  74. }
  75. }
  76. return sb;
  77. }
  78. /**
  79. * Build an index
  80. * @param arg1
  81. * @param arg2
  82. * @return
  83. */
  84. private static IntToIntMap buildMap(StringValue arg1, StringValue arg2) {
  85. int[] a1 = arg1.expand();
  86. int[] a2 = arg2.expand();
  87. IntToIntMap map = new IntToIntHashMap(a1.length, 0.5);
  88. // allow plenty of free space, it's better for lookups (though worse for iteration)
  89. for (int i=0; i<a1.length; i++) {
  90. if (map.find(a1[i])) {
  91. // no action: duplicate
  92. } else {
  93. map.put(a1[i], (i>a2.length-1 ? -1 : a2[i]));
  94. }
  95. }
  96. return map;
  97. }
  98. /**
  99. * Implement the translate() function using an index built at compile time
  100. * @param in the string to be translated
  101. * @param map index built at compile time, mapping input characters to output characters. The map returns
  102. * -1 for a character that is to be deleted from the input string, Integer.MAX_VALUE for a character that is
  103. * to remain intact
  104. * @return the translated character string
  105. */
  106. public static CharSequence translateUsingMap(CharSequence in, IntToIntMap map) {
  107. int len = in.length();
  108. FastStringBuffer sb = new FastStringBuffer(len);
  109. for (int i=0; i<len; i++) {
  110. int charval;
  111. int c = in.charAt(i);
  112. if (c >= 55296 && c <= 56319) {
  113. // we'll trust the data to be sound
  114. charval = ((c - 55296) * 1024) + ((int) in.charAt(i + 1) - 56320) + 65536;
  115. i++;
  116. } else {
  117. charval = c;
  118. }
  119. int newchar = map.get(charval);
  120. if (newchar == Integer.MAX_VALUE) {
  121. // character not in map, so is not to be translated
  122. newchar = charval;
  123. }
  124. if (newchar == -1) {
  125. // no action, delete the character
  126. } else if (newchar < 65536) {
  127. sb.append((char)newchar);
  128. } else { // output a surrogate pair
  129. //To compute the numeric value of the character corresponding to a surrogate
  130. //pair, use this formula (all numbers are hex):
  131. //(FirstChar - D800) * 400 + (SecondChar - DC00) + 10000
  132. newchar -= 65536;
  133. sb.append((char)((newchar / 1024) + 55296));
  134. sb.append((char)((newchar % 1024) + 56320));
  135. }
  136. }
  137. return sb;
  138. }
  139. }
  140. //
  141. // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
  142. // you may not use this file except in compliance with the License. You may obtain a copy of the
  143. // License at http://www.mozilla.org/MPL/
  144. //
  145. // Software distributed under the License is distributed on an "AS IS" basis,
  146. // WITHOUT WARRANTY OF ANY KIND, either express or implied.
  147. // See the License for the specific language governing rights and limitations under the License.
  148. //
  149. // The Original Code is: all this file.
  150. //
  151. // The Initial Developer of the Original Code is Michael H. Kay.
  152. //
  153. // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
  154. //
  155. // Contributor(s): none.
  156. //