PageRenderTime 6ms CodeModel.GetById 2ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/merge-dictionaries

http://github.com/zpao/v8monkey
#! | 68 lines | 52 code | 16 blank | 0 comment | 0 complexity | 2ba6ed60ee49313ab2312079f2be013b MD5 | raw file
 1#!/bin/bash
 2#
 3# merge-dictionaries
 4# 15/Apr/2010, Matt Caywood (caywood@gmail.com)
 5
 6# input files:
 7CHROMIUM_START=chromium_en_US.dic_delta
 8CHROMIUM_DIFF=upstream-chromium.diff
 9CHROMIUM_PATCHED=$CHROMIUM_START-patched
10CHROMIUM_AFFIX_CONVERTED=$CHROMIUM_START-affix-converted
11
12HUNSPELL_START=hunspell-en_US-20081205.dic
13HUNSPELL_DIFF=upstream-hunspell.diff
14HUNSPELL_PATCHED=$HUNSPELL_START-patched
15HUNSPELL_PATCHED_STRIPPED=$HUNSPELL_PATCHED-stripped
16
17MOZILLA_START=mozilla-specific.txt
18
19MERGED_SORTED=merged-list-sorted
20MERGED_FINISH=en-US.dic
21
22rm -f $CHROMIUM_PATCHED $CHROMIUM_AFFIX_CONVERTED $HUNSPELL_PATCHED $HUNSPELL_PATCHED_STRIPPED $MERGED_SORTED
23rm -f $MERGED_FINISH
24
25# Patch Chromium ($CHROMIUM_START --> $CHROMIUM_PATCHED)
26echo Patching Chromium dictionary
27cp $CHROMIUM_START $CHROMIUM_PATCHED
28patch $CHROMIUM_PATCHED $CHROMIUM_DIFF
29
30# Patch Hunspell ($HUNSPELL_START --> $HUNSPELL_PATCHED)
31echo Patching Hunspell dictionary
32cp $HUNSPELL_START $HUNSPELL_PATCHED
33patch $HUNSPELL_PATCHED $HUNSPELL_DIFF
34
35# Chromium's dictionary uses numeric shortcuts from en-US.aff, so that /7 stands in for /MS etc.
36# We need to replace these with the full alphabetic affix rules.
37#
38# This line just does affix conversions for the 4 rules of over 800(!) they are currently using. 
39# If in the future more are added, those affixes will need to be converted or else they will not be handled.
40
41echo Updating Chromium affixes
42sed -e 's/6/M/g;s/7/MS/g;s/12/U/g;s/30/MS\!/g;s/251/\!/g' $CHROMIUM_PATCHED > $CHROMIUM_AFFIX_CONVERTED
43
44# To check that conversion was correct, just search chromium-affix-converted for any numbers that are left over after conversion.
45
46if (grep [0123456789] $CHROMIUM_AFFIX_CONVERTED); then 
47	warn 'Some affix rules may not have been converted\n\n'; 
48fi
49
50# Strip old word count (first line) from $HUNSPELL_PATCHED
51sed '1d' $HUNSPELL_PATCHED > $HUNSPELL_PATCHED_STRIPPED
52
53# Combine dictionaries and sort
54echo Combining dictionaries
55sort $CHROMIUM_AFFIX_CONVERTED $HUNSPELL_PATCHED_STRIPPED $MOZILLA_START > $MERGED_SORTED
56
57# Display any dupes. 
58perl dupe-dictionary.pl $MERGED_SORTED
59
60# If that completed OK, add line count
61if [ "$?" = "0" ]; then
62  linecount=`cat $MERGED_SORTED | wc -l`
63  echo Adding line count $linecount
64  echo $linecount | cat - $MERGED_SORTED > $MERGED_FINISH
65fi  
66
67# Clean up
68rm -f $CHROMIUM_PATCHED $CHROMIUM_AFFIX_CONVERTED $HUNSPELL_PATCHED $HUNSPELL_PATCHED_STRIPPED $MERGED_SORTED