PageRenderTime 127ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/android/upstream/java/text/Collator.java

https://bitbucket.org/festevezga/xobotos
Java | 390 lines | 125 code | 30 blank | 235 comment | 10 complexity | fadc4448bed2b98b9ee8182e0a6a0f41 MD5 | raw file
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package java.text;
  18. import java.util.Comparator;
  19. import java.util.Locale;
  20. import libcore.icu.ICU;
  21. import libcore.icu.RuleBasedCollatorICU;
  22. /**
  23. * Performs locale-sensitive string comparison. A concrete subclass,
  24. * {@link RuleBasedCollator}, allows customization of the collation ordering by
  25. * the use of rule sets.
  26. * <p>
  27. * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s
  28. * specifications for the <a
  29. * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation
  30. * Algorithm (UCA)</a>, there are 4 different levels of strength used in
  31. * comparisons:
  32. * <ul>
  33. * <li>PRIMARY strength: Typically, this is used to denote differences between
  34. * base characters (for example, "a" &lt; "b"). It is the strongest difference.
  35. * For example, dictionaries are divided into different sections by base
  36. * character.
  37. * <li>SECONDARY strength: Accents in the characters are considered secondary
  38. * differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other differences
  39. * between letters can also be considered secondary differences, depending on
  40. * the language. A secondary difference is ignored when there is a primary
  41. * difference anywhere in the strings.
  42. * <li>TERTIARY strength: Upper and lower case differences in characters are
  43. * distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
  44. * "a&ograve;"). In addition, a variant of a letter differs from the base form
  45. * on the tertiary strength (such as "A" and "&#9398;"). Another example is the
  46. * difference between large and small Kana. A tertiary difference is ignored
  47. * when there is a primary or secondary difference anywhere in the strings.
  48. * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL
  49. * strength is used as a tiebreaker. The Unicode code point values of the NFD
  50. * form of each string are compared, just in case there is no difference. For
  51. * example, Hebrew cantellation marks are only distinguished at this strength.
  52. * This strength should be used sparingly, as only code point value differences
  53. * between two strings are an extremely rare occurrence. Using this strength
  54. * substantially decreases the performance for both comparison and collation key
  55. * generation APIs. This strength also increases the size of the collation key.
  56. * </ul>
  57. * <p>
  58. * This {@code Collator} deals only with two decomposition modes, the canonical
  59. * decomposition mode and one that does not use any decomposition. The
  60. * compatibility decomposition mode
  61. * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the
  62. * canonical decomposition mode is set, {@code Collator} handles un-normalized
  63. * text properly, producing the same results as if the text were normalized in
  64. * NFD. If canonical decomposition is turned off, it is the user's
  65. * responsibility to ensure that all text is already in the appropriate form
  66. * before performing a comparison or before getting a {@link CollationKey}.
  67. * <p>
  68. * <em>Examples:</em>
  69. * <blockquote>
  70. *
  71. * <pre>
  72. * // Get the Collator for US English and set its strength to PRIMARY
  73. * Collator usCollator = Collator.getInstance(Locale.US);
  74. * usCollator.setStrength(Collator.PRIMARY);
  75. * if (usCollator.compare(&quot;abc&quot;, &quot;ABC&quot;) == 0) {
  76. * System.out.println(&quot;Strings are equivalent&quot;);
  77. * }
  78. * </pre>
  79. *
  80. * </blockquote>
  81. * <p>
  82. * The following example shows how to compare two strings using the collator for
  83. * the default locale.
  84. * <blockquote>
  85. *
  86. * <pre>
  87. * // Compare two strings in the default locale
  88. * Collator myCollator = Collator.getInstance();
  89. * myCollator.setDecomposition(Collator.NO_DECOMPOSITION);
  90. * if (myCollator.compare(&quot;\u00e0\u0325&quot;, &quot;a\u0325\u0300&quot;) != 0) {
  91. * System.out.println(&quot;\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition&quot;);
  92. * myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
  93. * if (myCollator.compare(&quot;\u00e0\u0325&quot;, &quot;a\u0325\u0300&quot;) != 0) {
  94. * System.out.println(&quot;Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition&quot;);
  95. * } else {
  96. * System.out.println(&quot;\u00e0\u0325 is equal to a\u0325\u0300 with decomposition&quot;);
  97. * }
  98. * } else {
  99. * System.out.println(&quot;Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition&quot;);
  100. * }
  101. * </pre>
  102. *
  103. * </blockquote>
  104. *
  105. * @see RuleBasedCollator
  106. * @see CollationKey
  107. */
  108. public abstract class Collator implements Comparator<Object>, Cloneable {
  109. /**
  110. * Constant used to specify the decomposition rule.
  111. */
  112. public static final int NO_DECOMPOSITION = 0;
  113. /**
  114. * Constant used to specify the decomposition rule.
  115. */
  116. public static final int CANONICAL_DECOMPOSITION = 1;
  117. /**
  118. * Constant used to specify the decomposition rule. This value for
  119. * decomposition is not supported.
  120. */
  121. public static final int FULL_DECOMPOSITION = 2;
  122. /**
  123. * Constant used to specify the collation strength.
  124. */
  125. public static final int PRIMARY = 0;
  126. /**
  127. * Constant used to specify the collation strength.
  128. */
  129. public static final int SECONDARY = 1;
  130. /**
  131. * Constant used to specify the collation strength.
  132. */
  133. public static final int TERTIARY = 2;
  134. /**
  135. * Constant used to specify the collation strength.
  136. */
  137. public static final int IDENTICAL = 3;
  138. RuleBasedCollatorICU icuColl;
  139. Collator(RuleBasedCollatorICU icuColl) {
  140. this.icuColl = icuColl;
  141. }
  142. /**
  143. * Constructs a new {@code Collator} instance.
  144. */
  145. protected Collator() {
  146. icuColl = new RuleBasedCollatorICU(Locale.getDefault());
  147. }
  148. /**
  149. * Returns a new collator with the same decomposition mode and
  150. * strength value as this collator.
  151. *
  152. * @return a shallow copy of this collator.
  153. * @see java.lang.Cloneable
  154. */
  155. @Override
  156. public Object clone() {
  157. try {
  158. Collator clone = (Collator) super.clone();
  159. clone.icuColl = (RuleBasedCollatorICU) icuColl.clone();
  160. return clone;
  161. } catch (CloneNotSupportedException e) {
  162. throw new AssertionError(e);
  163. }
  164. }
  165. /**
  166. * Compares two objects to determine their relative order. The objects must
  167. * be strings.
  168. *
  169. * @param object1
  170. * the first string to compare.
  171. * @param object2
  172. * the second string to compare.
  173. * @return a negative value if {@code object1} is less than {@code object2},
  174. * 0 if they are equal, and a positive value if {@code object1} is
  175. * greater than {@code object2}.
  176. * @throws ClassCastException
  177. * if {@code object1} or {@code object2} is not a {@code String}.
  178. */
  179. public int compare(Object object1, Object object2) {
  180. return compare((String) object1, (String) object2);
  181. }
  182. /**
  183. * Compares two strings to determine their relative order.
  184. *
  185. * @param string1
  186. * the first string to compare.
  187. * @param string2
  188. * the second string to compare.
  189. * @return a negative value if {@code string1} is less than {@code string2},
  190. * 0 if they are equal and a positive value if {@code string1} is
  191. * greater than {@code string2}.
  192. */
  193. public abstract int compare(String string1, String string2);
  194. /**
  195. * Compares this collator with the specified object and indicates if they
  196. * are equal.
  197. *
  198. * @param object
  199. * the object to compare with this object.
  200. * @return {@code true} if {@code object} is a {@code Collator} object and
  201. * it has the same strength and decomposition values as this
  202. * collator; {@code false} otherwise.
  203. * @see #hashCode
  204. */
  205. @Override
  206. public boolean equals(Object object) {
  207. if (!(object instanceof Collator)) {
  208. return false;
  209. }
  210. Collator collator = (Collator) object;
  211. return icuColl == null ? collator.icuColl == null : icuColl.equals(collator.icuColl);
  212. }
  213. /**
  214. * Compares two strings using the collation rules to determine if they are
  215. * equal.
  216. *
  217. * @param string1
  218. * the first string to compare.
  219. * @param string2
  220. * the second string to compare.
  221. * @return {@code true} if {@code string1} and {@code string2} are equal
  222. * using the collation rules, false otherwise.
  223. */
  224. public boolean equals(String string1, String string2) {
  225. return compare(string1, string2) == 0;
  226. }
  227. /**
  228. * Returns an array of locales for which custom {@code Collator} instances
  229. * are available.
  230. * <p>Note that Android does not support user-supplied locale service providers.
  231. */
  232. public static Locale[] getAvailableLocales() {
  233. return ICU.getAvailableCollatorLocales();
  234. }
  235. /**
  236. * Returns a {@link CollationKey} for the specified string for this collator
  237. * with the current decomposition rule and strength value.
  238. *
  239. * @param string
  240. * the source string that is converted into a collation key.
  241. * @return the collation key for {@code string}.
  242. */
  243. public abstract CollationKey getCollationKey(String string);
  244. /**
  245. * Returns the decomposition rule for this collator.
  246. *
  247. * @return the decomposition rule, either {@code NO_DECOMPOSITION} or
  248. * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is
  249. * not supported.
  250. */
  251. public int getDecomposition() {
  252. return decompositionMode_ICU_Java(icuColl.getDecomposition());
  253. }
  254. /**
  255. * Returns a {@code Collator} instance which is appropriate for the user's default
  256. * {@code Locale}.
  257. * See "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>".
  258. */
  259. public static Collator getInstance() {
  260. return getInstance(Locale.getDefault());
  261. }
  262. /**
  263. * Returns a {@code Collator} instance which is appropriate for {@code locale}.
  264. */
  265. public static Collator getInstance(Locale locale) {
  266. if (locale == null) {
  267. throw new NullPointerException();
  268. }
  269. return new RuleBasedCollator(new RuleBasedCollatorICU(locale));
  270. }
  271. /**
  272. * Returns the strength value for this collator.
  273. *
  274. * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or
  275. * IDENTICAL.
  276. */
  277. public int getStrength() {
  278. return strength_ICU_Java(icuColl.getStrength());
  279. }
  280. @Override
  281. public abstract int hashCode();
  282. /**
  283. * Sets the decomposition rule for this collator.
  284. *
  285. * @param value
  286. * the decomposition rule, either {@code NO_DECOMPOSITION} or
  287. * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION}
  288. * is not supported.
  289. * @throws IllegalArgumentException
  290. * if the provided decomposition rule is not valid. This includes
  291. * {@code FULL_DECOMPOSITION}.
  292. */
  293. public void setDecomposition(int value) {
  294. icuColl.setDecomposition(decompositionMode_Java_ICU(value));
  295. }
  296. /**
  297. * Sets the strength value for this collator.
  298. *
  299. * @param value
  300. * the strength value, either PRIMARY, SECONDARY, TERTIARY, or
  301. * IDENTICAL.
  302. * @throws IllegalArgumentException
  303. * if the provided strength value is not valid.
  304. */
  305. public void setStrength(int value) {
  306. icuColl.setStrength(strength_Java_ICU(value));
  307. }
  308. private int decompositionMode_Java_ICU(int mode) {
  309. switch (mode) {
  310. case Collator.CANONICAL_DECOMPOSITION:
  311. return RuleBasedCollatorICU.VALUE_ON;
  312. case Collator.NO_DECOMPOSITION:
  313. return RuleBasedCollatorICU.VALUE_OFF;
  314. }
  315. throw new IllegalArgumentException();
  316. }
  317. private int decompositionMode_ICU_Java(int mode) {
  318. int javaMode = mode;
  319. switch (mode) {
  320. case RuleBasedCollatorICU.VALUE_OFF:
  321. javaMode = Collator.NO_DECOMPOSITION;
  322. break;
  323. case RuleBasedCollatorICU.VALUE_ON:
  324. javaMode = Collator.CANONICAL_DECOMPOSITION;
  325. break;
  326. }
  327. return javaMode;
  328. }
  329. private int strength_Java_ICU(int value) {
  330. switch (value) {
  331. case Collator.PRIMARY:
  332. return RuleBasedCollatorICU.VALUE_PRIMARY;
  333. case Collator.SECONDARY:
  334. return RuleBasedCollatorICU.VALUE_SECONDARY;
  335. case Collator.TERTIARY:
  336. return RuleBasedCollatorICU.VALUE_TERTIARY;
  337. case Collator.IDENTICAL:
  338. return RuleBasedCollatorICU.VALUE_IDENTICAL;
  339. }
  340. throw new IllegalArgumentException();
  341. }
  342. private int strength_ICU_Java(int value) {
  343. int javaValue = value;
  344. switch (value) {
  345. case RuleBasedCollatorICU.VALUE_PRIMARY:
  346. javaValue = Collator.PRIMARY;
  347. break;
  348. case RuleBasedCollatorICU.VALUE_SECONDARY:
  349. javaValue = Collator.SECONDARY;
  350. break;
  351. case RuleBasedCollatorICU.VALUE_TERTIARY:
  352. javaValue = Collator.TERTIARY;
  353. break;
  354. case RuleBasedCollatorICU.VALUE_IDENTICAL:
  355. javaValue = Collator.IDENTICAL;
  356. break;
  357. }
  358. return javaValue;
  359. }
  360. }