PageRenderTime 24ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/itp/classes/a2z/week03/treemap/Concordance.java

https://github.com/brannondorsey/shiffman.github.io
Java | 89 lines | 56 code | 13 blank | 20 comment | 5 complexity | 0d2c3543c98b7f0479bf0ed790048723 MD5 | raw file
  1. /* Daniel Shiffman */
  2. /* Programming from A to Z */
  3. /* Simple Text Concordance */
  4. /* This is a simplified version */
  5. /* Using a Java TreeMap */
  6. import java.io.*;
  7. import java.util.*;
  8. import a2z.*;
  9. import java.util.regex.*;
  10. public class Concordance
  11. {
  12. public static void main(String[] args)
  13. {
  14. // Print out some basic info about the program
  15. System.out.println("\n\n Welcome to the java text concordance program.");
  16. System.out.println(" This program treats all characters as ") ;
  17. System.out.println(" delimiters with the following exceptions: ") ;
  18. System.out.println(" Letters A-Z and a-z ") ;
  19. System.out.println(" Numbers 0-9 ") ;
  20. System.out.println(" An apostrophe ") ;
  21. System.out.println(" All delimiters are thrown away and not kept as part of the concordance. ") ;
  22. System.out.println(" In addition, this program is case sensitive and treats uppercase letters as ") ;
  23. System.out.println(" alphabetically lower than lowercase ones.") ;
  24. System.out.println(" Also note this program was designed for use w/ Unix formatted text files.\n\n") ;
  25. String path = null;
  26. try
  27. {
  28. // Step 1, read the input file
  29. path = args[0];
  30. A2ZFileReader fr = new A2ZFileReader(path);
  31. String content = fr.getContent();
  32. // Step 2, create an empty Tree
  33. // TreeMap words = new TreeMap(); // We used to say this!
  34. // Now we use "generics" to specify what will be in the Collection
  35. TreeMap words = new TreeMap();
  36. // Step 3, break input file up into words
  37. // We are doing this with split and a regular expression
  38. String regex = "\\b";
  39. String tokens[] = content.split(regex);
  40. // We'll use a regular exrpession to match words with only characters and apostrophes
  41. // Throwing away all the punctuation (we could do this with a different split regex too)
  42. Pattern p = Pattern.compile("[a-z']+",Pattern.CASE_INSENSITIVE);
  43. // For every word
  44. for (int i = 0; i < tokens.length; i++)
  45. {
  46. String s = tokens[i].toLowerCase();
  47. // If it matches our regex, insert it in the tree
  48. Matcher m = p.matcher(s);
  49. if (m.matches()) {
  50. if (words.containsKey(s)) {
  51. Word w = (Word) words.get(s);
  52. w.count();
  53. } else {
  54. Word w = new Word(s);
  55. words.put(s,w);
  56. }
  57. }
  58. }
  59. // We're done, print out contents of Tree!
  60. System.out.println("Here are the contents of your tree:");
  61. Iterator iterator = words.values().iterator();
  62. while (iterator.hasNext()) {
  63. Word word = (Word) iterator.next();
  64. System.out.println(word.getWord() + " " + word.getCount());
  65. }
  66. // Look Ma, Error Handling!
  67. //check for IO problem with bad file
  68. } catch (IOException e) {
  69. System.out.println("There was a problem with the filename you entered.");
  70. //check for no filename entered at command prompt
  71. } catch (ArrayIndexOutOfBoundsException e)
  72. {
  73. System.out.println("Please include a filename when running this program.");
  74. }
  75. }
  76. }