/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/merge-dictionaries

http://github.com/zpao/v8monkey · #! · 68 lines · 52 code · 16 blank · 0 comment · 0 complexity · 2ba6ed60ee49313ab2312079f2be013b MD5 · raw file

  1. #!/bin/bash
  2. #
  3. # merge-dictionaries
  4. # 15/Apr/2010, Matt Caywood (caywood@gmail.com)
  5. # input files:
  6. CHROMIUM_START=chromium_en_US.dic_delta
  7. CHROMIUM_DIFF=upstream-chromium.diff
  8. CHROMIUM_PATCHED=$CHROMIUM_START-patched
  9. CHROMIUM_AFFIX_CONVERTED=$CHROMIUM_START-affix-converted
  10. HUNSPELL_START=hunspell-en_US-20081205.dic
  11. HUNSPELL_DIFF=upstream-hunspell.diff
  12. HUNSPELL_PATCHED=$HUNSPELL_START-patched
  13. HUNSPELL_PATCHED_STRIPPED=$HUNSPELL_PATCHED-stripped
  14. MOZILLA_START=mozilla-specific.txt
  15. MERGED_SORTED=merged-list-sorted
  16. MERGED_FINISH=en-US.dic
  17. rm -f $CHROMIUM_PATCHED $CHROMIUM_AFFIX_CONVERTED $HUNSPELL_PATCHED $HUNSPELL_PATCHED_STRIPPED $MERGED_SORTED
  18. rm -f $MERGED_FINISH
  19. # Patch Chromium ($CHROMIUM_START --> $CHROMIUM_PATCHED)
  20. echo Patching Chromium dictionary
  21. cp $CHROMIUM_START $CHROMIUM_PATCHED
  22. patch $CHROMIUM_PATCHED $CHROMIUM_DIFF
  23. # Patch Hunspell ($HUNSPELL_START --> $HUNSPELL_PATCHED)
  24. echo Patching Hunspell dictionary
  25. cp $HUNSPELL_START $HUNSPELL_PATCHED
  26. patch $HUNSPELL_PATCHED $HUNSPELL_DIFF
  27. # Chromium's dictionary uses numeric shortcuts from en-US.aff, so that /7 stands in for /MS etc.
  28. # We need to replace these with the full alphabetic affix rules.
  29. #
  30. # This line just does affix conversions for the 4 rules of over 800(!) they are currently using.
  31. # If in the future more are added, those affixes will need to be converted or else they will not be handled.
  32. echo Updating Chromium affixes
  33. sed -e 's/6/M/g;s/7/MS/g;s/12/U/g;s/30/MS\!/g;s/251/\!/g' $CHROMIUM_PATCHED > $CHROMIUM_AFFIX_CONVERTED
  34. # To check that conversion was correct, just search chromium-affix-converted for any numbers that are left over after conversion.
  35. if (grep [0123456789] $CHROMIUM_AFFIX_CONVERTED); then
  36. warn 'Some affix rules may not have been converted\n\n';
  37. fi
  38. # Strip old word count (first line) from $HUNSPELL_PATCHED
  39. sed '1d' $HUNSPELL_PATCHED > $HUNSPELL_PATCHED_STRIPPED
  40. # Combine dictionaries and sort
  41. echo Combining dictionaries
  42. sort $CHROMIUM_AFFIX_CONVERTED $HUNSPELL_PATCHED_STRIPPED $MOZILLA_START > $MERGED_SORTED
  43. # Display any dupes.
  44. perl dupe-dictionary.pl $MERGED_SORTED
  45. # If that completed OK, add line count
  46. if [ "$?" = "0" ]; then
  47. linecount=`cat $MERGED_SORTED | wc -l`
  48. echo Adding line count $linecount
  49. echo $linecount | cat - $MERGED_SORTED > $MERGED_FINISH
  50. fi
  51. # Clean up
  52. rm -f $CHROMIUM_PATCHED $CHROMIUM_AFFIX_CONVERTED $HUNSPELL_PATCHED $HUNSPELL_PATCHED_STRIPPED $MERGED_SORTED