PageRenderTime 33ms CodeModel.GetById 11ms RepoModel.GetById 1ms app.codeStats 0ms

/wordcounter/src/main/java/com/stoyanr/wordcounter/WordUtils.java

https://gitlab.com/math4youbyusgroupillinois/Wordcounter
Java | 73 lines | 38 code | 6 blank | 29 comment | 12 complexity | ecf1b83b79c4400bf681384af7f3c7e2 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * $Id: $
  3. *
  4. * Copyright 2012 Stoyan Rachev (stoyanr@gmail.com)
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package com.stoyanr.wordcounter;
  19. import java.util.function.UnaryOperator;
  20. import com.stoyanr.util.CharPredicate;
  21. /**
  22. * A utility class that provides several overloaded static methods for counting words in strings.
  23. * The central method {@code countWords} accepts a string, a predicate to determine whether a
  24. * character is a word character, and an optional unary operator to be performed on words.
  25. * <p>
  26. * <pre>
  27. * // Count all words consisting of only alphabetic chars, ignoring case
  28. * WordCounts wc = WordUtils.countWords(text, (c) -> Character.isAlphabetic(c), (s) -> s.toLowerCase());
  29. * </pre>
  30. *
  31. * @author Stoyan Rachev
  32. */
  33. public class WordUtils {
  34. public static WordCounts countWords(String text, CharPredicate pred) {
  35. return countWords(text, pred, null);
  36. }
  37. public static WordCounts countWords(String text, CharPredicate pred, UnaryOperator<String> op) {
  38. assert (text != null);
  39. WordCounts result = new WordCounts();
  40. int i = 0;
  41. while (i < text.length()) {
  42. while (i < text.length() && !pred.test(text.charAt(i))) {
  43. i++;
  44. }
  45. int bi = i;
  46. while (i < text.length() && pred.test(text.charAt(i))) {
  47. i++;
  48. }
  49. int ei = i;
  50. if (bi != ei) {
  51. String word = text.substring(bi, ei);
  52. if (op != null) {
  53. word = op.apply(word);
  54. }
  55. result.add(word, 1);
  56. }
  57. }
  58. return result;
  59. }
  60. public static int getEndWordIndex(String text, CharPredicate pred) {
  61. int ei = text.length();
  62. while (ei > 0 && pred.test(text.charAt(ei - 1))) {
  63. ei--;
  64. }
  65. return ei;
  66. }
  67. }